In [1]:
import pandas as pd
import snscrape.modules.twitter as sntwitter
import re
import string
import matplotlib.pyplot as plt
from textblob import TextBlob

query = "(#30DaysOfLearning OR #NG30DaysOfLearning) until:2022-06-26 since:2022-05-05"
tweets = []
limit = 300  

for tweet in sntwitter.TwitterHashtagScraper(query).get_items():
    if len(tweets) == limit:
        break
    else:
        tweets.append([tweet.date, tweet.url, tweet.user.username, tweet.sourceLabel, tweet.user.location, 
                       tweet.content, tweet.likeCount, tweet.retweetCount, tweet.quoteCount, tweet.replyCount])


df = pd.DataFrame(tweets, columns=['Date', 'TweetURL', 'User ', 'Source', 'Location', 'Tweet', 
                                    'Likes_Count', 'Retweet_Count', 'Quote_Count', 'Reply_Count'])


def clean_tweet(tweet):
    tweet = re.sub(r'http\S+', '', tweet)  # Remove URLs
    tweet = re.sub(r'#\w+', '', tweet)     # Remove hashtags
    tweet = re.sub(r'@\w+', '', tweet)     # Remove mentions
    tweet = tweet.translate(str.maketrans('', '', string.punctuation))  # Remove punctuation
    tweet = tweet.lower()  # Convert to lowercase
    return tweet

df['Cleaned_Tweet'] = df['Tweet'].apply(clean_tweet)


def get_sentiment(tweet):
    return TextBlob(tweet).sentiment.polarity

df['Sentiment'] = df['Cleaned_Tweet'].apply(get_sentiment)


print(df.describe())


active_users = df['User '].value_counts()
print(active_users.head(10))  

plt.figure(figsize=(10, 6))
active_users.head(10).plot(kind='bar', color='skyblue')
plt.title('Top 10 Active Users')
plt.xlabel('Users')
plt.ylabel('Number of Tweets')
plt.xticks(rotation=45)
plt.show()


plt.figure(figsize=(10, 6))
df['Sentiment'].hist(bins=20, color='lightgreen')
plt.title('Sentiment Distribution')
plt.xlabel('Sentiment Score')
plt.ylabel('Frequency')
plt.grid(axis='y')
plt.show()

ModuleNotFoundError: No module named 'snscrape'