In [None]:
# Word Frequency Analysis in DeepSeek Technical Text
from collections import Counter
import matplotlib.pyplot as plt
import string

# Step 1: List of common stopwords to ignore
stopwords = {
    "the", "and", "is", "in", "to", "of", "a", "for", "on", "with", "as",
    "an", "by", "this", "that", "are", "it", "at", "from", "be", "or", "was",
    "which", "these", "their", "but", "has", "have", "can", "will"
}

# Step 2: Read the text file
with open("deepseek.txt", "r", encoding="utf-8") as file:
    text = file.read().lower()  # Convert text to lowercase

# Step 3: Remove punctuation
translator = str.maketrans('', '', string.punctuation)
cleaned_text = text.translate(translator)

# Step 4: Split text into words
words = cleaned_text.split()

# Step 5: Remove stopwords
filtered_words = [word for word in words if word not in stopwords]

# Step 6: Count word frequencies
word_counts = Counter(filtered_words)

# Step 7: Get the 10 most common words
top_10 = word_counts.most_common(10)
print("Top 10 words with counts:", top_10)

# Step 8: Separate words and counts using list comprehension
words_list = [item[0] for item in top_10]
counts_list = [item[1] for item in top_10]

# Step 9: Plot the bar graph
plt.figure(figsize=(10,6))
plt.bar(words_list, counts_list, color='lightgreen')
plt.title("Top 10 Most Used Words in deepseek.txt (Filtered)")
plt.xlabel("Words")
plt.ylabel("Frequency")
plt.xticks(rotation=45)
plt.show()

# Step 10: Easy Summary
print("Summary: The most frequent word is '{}' with {} occurrences.".format(top_10[0][0], top_10[0][1]))
print("Observation: After removing common words, the top words highlight the key technical terms and concepts in the text.")
