### Twitter extended

Read Database

In [1]:
import pandas as pd

file_path = "D:\Software proj\sentiment_2024.xlsx"

try:
    print("Loading the Excel file...")
    df = pd.read_excel(file_path)
    print("File loaded successfully!")
    
    print("Preview of the data:")
    print(df.head())
except FileNotFoundError:
    print(f"Error: File '{file_path}' not found. Please check the file path.")
except Exception as e:
    print(f"An error occurred: {e}")

Loading the Excel file...
File loaded successfully!
Preview of the data:
                                 User  \
0  https://twitter.com/chocolatee_ary   
1      https://twitter.com/SKurtzCWSP   
2     https://twitter.com/JamesAithie   
3          https://twitter.com/roqzee   
4        https://twitter.com/bjgunner   

                                                Post                Time  \
0  https://x.com/chocolatee_ary/status/1785819428... 2024-05-01 23:51:45   
1  https://x.com/SKurtzCWSP/status/17858103767911... 2024-05-01 23:15:47   
2  https://x.com/JamesAithie/status/1785802731355... 2024-05-01 22:45:24   
3    https://x.com/roqzee/status/1785801453086396798 2024-05-01 22:40:19   
4  https://x.com/bjgunner/status/1785796031394759027 2024-05-01 22:18:47   

                                             Comment  Likes  Nb.Comments  \
0          Off the radar🛬🌸🌴🤍 https://t.co/I3ZWc9Qxft   1063            6   
1  So this is how it looks…\n\nA £200k donation, ...    120           1

#### SENTIMENT

In [2]:
from transformers import pipeline
import plotly.express as px

df['Comment'] = df['Comment'].fillna('') 

if 'Sentiment' not in df.columns:
    df['Sentiment'] = ''

sentiment_analyzer = pipeline('sentiment-analysis', model="distilbert-base-uncased-finetuned-sst-2-english")

for index, row in df.iterrows():
    text = row['Comment']
    if text.strip(): 
        result = sentiment_analyzer(text)
        sentiment = result[0]['label']  
        df.at[index, 'Sentiment'] = sentiment
        #print(f"Text: {text}\nSentiment: {sentiment}\n")  
        df.at[index, 'Sentiment'] = sentiment

sentiment_counts = df['Sentiment'].value_counts()

chart_data = pd.DataFrame({
    'Sentiment': sentiment_counts.index,
    'Count': sentiment_counts.values
})
fig = px.pie(chart_data, values='Count', names='Sentiment', title='Sentiment Distribution')
fig.show()

#### Term frequency over time

In [5]:
import re

df['Time'] = pd.to_datetime(df['Time'])
df['YearMonth'] = df['Time'].dt.to_period('M')


topics = ["environment", "olimpics", "health", "beauty", "quality", "technology", 
         "price", "cost", "expensive","cheap", "gas", "trump", "kamala", "elections","love","diesel","polution","Joe Biden"]

# Function to clean comments
def clean_comment(text):
    text = str(text).lower() 
    text = re.sub(r"http\S+", "", text) 
    text = re.sub(r"@\S+", "", text)  
    text = re.sub(r"[^a-z\s]", "", text) 
    return text


df['Cleaned_Comment'] = df['Comment'].apply(clean_comment)

trend_data = {topic: [] for topic in topics}
trend_data['Month'] = []  


for period, group in df.groupby('YearMonth'):
    trend_data['Month'].append(str(period))  
    cleaned_comments = group['Cleaned_Comment'].str.cat(sep=' ')  
    for topic in topics:
        trend_data[topic].append(cleaned_comments.count(topic)) 


trend_df = pd.DataFrame(trend_data)

print("Aggregated trends:")
print(trend_df.head())

fig = px.line(
    trend_df,
    x='Month',
    y=[topic for topic in topics],
    title='Custom Topic Trends Over Time',
    labels={'value': 'Frequency', 'variable': 'Topic'},
)
fig.update_layout(xaxis_title='Month', yaxis_title='Frequency of Mentions')
fig.show()


Aggregated trends:
   environment  olimpics  health  beauty  quality  technology  price  cost  \
0            0         0       9       1        1           0      2     0   
1            2         0       4       0        1           0      0     5   
2            0         0       4       2        0           0      1     0   
3            0         0       0       0        0           1      0     1   
4            0         0       1       0        1           0      2     0   

   expensive  cheap  gas  trump  kamala  elections  love  diesel  polution  \
0          0      3    4      4       0          1    21       0         0   
1          0      0    1      4       0          0    20       0         0   
2          0      0    2      5       0          0    19       0         0   
3          0      0    1      1       0          2    19       0         0   
4          0      1    0      7       0          1    26       0         0   

   Joe Biden    Month  
0          0  2024-