In [None]:
%reload_ext autoreload
%autoreload 2
import os
import sys
import seaborn as sns

import pandas as pd
from textblob import TextBlob
import matplotlib.pyplot as plt
# Add parent directory to path to import modules from src
rpath = os.path.abspath('..')
if rpath not in sys.path:
    sys.path.insert(0, rpath)

from src.loader import SlackDataLoader
import src.utils as utils
from src.preprocessing import Preprocessing
# Initialize DataLoader
sdl = SlackDataLoader('../Anonymized_B6SlackExport_25Nov23/anonymized/')
# Group all messages by day since the start of the training
df = sdl.slack_channels_parser('../Anonymized_B6SlackExport_25Nov23/anonymized/')
df['msg_sent_time'] = pd.to_datetime(df['msg_sent_time'], unit='s').dt.date
# Concatenate messages for each day into one big text
df_concat = df.groupby('msg_sent_time')['msg_content'] \
    .agg(lambda messages: ' '.join(str(message) for message in messages) if pd.notna(messages.name) else '') \
    .reset_index(name='msg_content')
df_concat

In [None]:
preprocess = Preprocessing(df_concat)
processed_df_concat = preprocess.preprocess_messages(df_concat,'msg_content')
def calculate_sentiment(row):
    try:
        blob = TextBlob(row['msg_content'])
        sentiment = blob.sentiment.polarity
        return {'Day': row.name, 'Sentiment': sentiment}
    except Exception as e:
        print(f"Error processing row {row.name}: {e}")
        return {'Day': row.name, 'Sentiment': None}

# Apply the function to each row and convert the result to a DataFrame
senti_df = processed_df_concat.apply(calculate_sentiment, axis=1).apply(pd.Series)
plt.figure(figsize=(12, 6))
plt.plot(senti_df['Day'], senti_df['Sentiment'], marker='o')
plt.title('Time Series Change in Sentiment')
plt.xlabel('Day')
plt.ylabel('Sentiment')
plt.grid(True)
plt.show()