Task 1: Label each employee message as Positive, Negative, or Neutral using VADER
1. Import libraries and Load Dataset

In [22]:
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from datetime import datetime
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

#LOAD test .csv file 
path="C:/Users/ladom/Desktop/TS/ML/AI-project-submission/"
df=pd.read_csv(f"{path}data/test(in).csv")

df.head()

Unnamed: 0,Subject,body,date,from
0,EnronOptions Update!,EnronOptions Announcement\n\n\nWe have updated...,5/10/2010,sally.beck@enron.com
1,(No Subject),"Marc,\n\nUnfortunately, today is not going to ...",7/29/2010,eric.bass@enron.com
2,Phone Screen Interview - Shannon L. Burnham,"When: Wednesday, June 06, 2001 10:00 AM-11:00 ...",7/25/2011,sally.beck@enron.com
3,RE: My new work email,we were thinking papasitos (we can meet somewh...,3/25/2010,johnny.palmer@enron.com
4,Bet,Since you never gave me the $20 for the last t...,5/21/2011,lydia.delgado@enron.com


2.Initialize VADER and Create  Sentiment Labeling Function

In [8]:
# create a Vader sentiment analyzer 
analyzer=SentimentIntensityAnalyzer()

# Define a function to get sentiment score
def get_sentiment_label(text):
    score=analyzer.polarity_scores(text)['compound']
    if score >= 0.05:
        return 'Positive'
    elif score <= -0.05:
        return 'Negative'
    else:
        return 'Neutral'

3. Apply labeling and Save labeled data 

In [None]:
df['sentiment']=df['body'].apply(get_sentiment_label)
# Save labeled dataset for further tasks 
df.to_csv(f"{path}data/labeled_test.csv")

Task 2: Eploratory Data Anlysis (EDA) 
1. Analyze labeled dataset to understand the structure 

In [23]:
# Load labeled dataset 
df=pd.read_csv(f"{path}data/labeled_test.csv")

print(df.info())

df.head()



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2191 entries, 0 to 2190
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Unnamed: 0  2191 non-null   int64 
 1   Subject     2191 non-null   object
 2   body        2191 non-null   object
 3   date        2191 non-null   object
 4   from        2191 non-null   object
 5   sentiment   2191 non-null   object
dtypes: int64(1), object(5)
memory usage: 102.8+ KB
None


Unnamed: 0.1,Unnamed: 0,Subject,body,date,from,sentiment
0,0,EnronOptions Update!,EnronOptions Announcement\n\n\nWe have updated...,5/10/2010,sally.beck@enron.com,Positive
1,1,(No Subject),"Marc,\n\nUnfortunately, today is not going to ...",7/29/2010,eric.bass@enron.com,Positive
2,2,Phone Screen Interview - Shannon L. Burnham,"When: Wednesday, June 06, 2001 10:00 AM-11:00 ...",7/25/2011,sally.beck@enron.com,Neutral
3,3,RE: My new work email,we were thinking papasitos (we can meet somewh...,3/25/2010,johnny.palmer@enron.com,Neutral
4,4,Bet,Since you never gave me the $20 for the last t...,5/21/2011,lydia.delgado@enron.com,Positive


In [None]:
# Check  for missing values 
df.isnull().sum()

# basic counts '
print(f"Total number of messages: {df.shape[0]}")
print(f"Total number of unique employees: {df['from'].nunique()}")



2. Sentiment Distribution 

In [None]:

setiment_counts=df['sentiment'].value_counts()
print(setiment_counts)

plt.figure(figsize=(6,4))
sns.countplot(x='sentiment', data=df, palette={"Positive":"green", "Negative":"red", "Neutral":"blue"})
plt.title("Sentiment Label Distribution")
plt.xlabel("Sentiment")
plt.ylabel("Number of Messages")
plt.show()


3. Convert Data column to datetime , Create month period column

In [None]:
df['date']=pd.to_datetime(df['date'])

# check the date range 
print(f"Date range: {df['date'].min()} to {df['date'].max()}")
df['month']=df['date'].dt.to_period('M') 

# Group by month and sentiment 
monthly_sentiment=df.groupby(['month','sentiment']).size().unstack().fillna(0)
monthly_sentiment

4. Plot monthly sentiment trends (stacked bar chart )

In [None]:
monthly_sentiment.plot(kind='bar', stacked=True, figsize=(12,6), color={
    "Positive":"green",
    "Negative":"red",
    "Neutral":"blue"
})
plt.title("Monthly Sentiment Trends")
plt.xlabel("Month")
plt.ylabel("Number of Messages")
plt.xticks(rotation=45)
plt.legend(title="Sentiment")
plt.show()


5. Plot Negative message trend Seperately 

In [None]:
monthly_sentiment['Negative'].plot(kind='line', marker='o', color='red', figsize=(10,5))
plt.title('Trend of Negative Messages Over Time')
plt.xlabel('Month')
plt.ylabel('Number of Negative Messages')
plt.grid(True)
plt.show()


6. Save EDA summaries 

In [24]:
monthly_sentiment.to_csv(f"{path}reports/monthly_sentiment_summary.csv")
setiment_counts.to_csv(f"{path}reports/sentiment_distribution.csv")
