In [19]:
import pandas as pd
from transformers import pipeline
import matplotlib.pyplot as plt
import seaborn as sns

In [20]:
CSV_FILE_PATH = 'chat.csv'
DATE_TIME_COLUMN = 'datetime'
PERSON_COLUMN = 'person'
MESSAGE_COLUMN = 'message'

In [21]:
df = pd.read_csv(CSV_FILE_PATH)

df[DATE_TIME_COLUMN] = pd.to_datetime(df[DATE_TIME_COLUMN])
print(df.head())

             datetime  person                                   message
0 2025-05-30 10:01:23       0                      Hey! How’s it going?
1 2025-05-30 10:01:45       0                                You there?
2 2025-05-30 10:02:10       1  Hey! I'm good, just busy with work. You?
3 2025-05-30 10:02:45       0      Same here. Deadlines are killing me.
4 2025-05-30 10:03:01       0               Can’t wait for the weekend.


In [22]:
unique_persons = df[PERSON_COLUMN].unique()
person1, person2 = unique_persons

In [23]:
# TODO: use lighter model
sentiment_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english", framework="pt")

In [24]:

# Pass a list of all messages to the pipeline.
# The pipeline is optimized to handle batches of text efficiently.
sentiments = sentiment_pipeline(df['message'].tolist())

# Extract labels and scores from the pipeline's output
df['sentiment_label'] = [s['label'] for s in sentiments]
df['sentiment_score'] = [s['score'] for s in sentiments]

# Map sentiment labels to numerical values (POSITIVE: 1, NEUTRAL: 0, NEGATIVE: -1)
# Ensure this mapping matches the labels produced by your chosen model (e.g., 'POSITIVE', 'NEGATIVE' for SST-2)
sentiment_mapping = {'POSITIVE': 1, 'NEUTRAL': 0, 'NEGATIVE': -1}
# You might need to adjust this mapping if your model uses different labels (e.g., '2 stars', '3 stars', '4 stars')
# Or if it only outputs 'NEGATIVE' and 'POSITIVE'. Handle missing labels with .fillna(0)
df['sentiment_value'] = df['sentiment_label'].map(sentiment_mapping).fillna(0)


In [25]:
df.head()

Unnamed: 0,datetime,person,message,sentiment_label,sentiment_score,sentiment_value
0,2025-05-30 10:01:23,0,Hey! How’s it going?,POSITIVE,0.996429,1
1,2025-05-30 10:01:45,0,You there?,POSITIVE,0.994265,1
2,2025-05-30 10:02:10,1,"Hey! I'm good, just busy with work. You?",POSITIVE,0.999327,1
3,2025-05-30 10:02:45,0,Same here. Deadlines are killing me.,NEGATIVE,0.999667,-1
4,2025-05-30 10:03:01,0,Can’t wait for the weekend.,POSITIVE,0.998262,1
