# 🧠 Employee Sentiment Analysis – Final LLM Assessment Project

## 1. Sentiment Labeling

Classify each employee message as Positive, Neutral, or Negative using TextBlob.

In [None]:
from textblob import TextBlob
import pandas as pd

df = pd.read_csv('test.csv')
df['date'] = pd.to_datetime(df['date'])

def get_sentiment(text):
    polarity = TextBlob(str(text)).sentiment.polarity
    if polarity > 0.1:
        return 'Positive'
    elif polarity < -0.1:
        return 'Negative'
    else:
        return 'Neutral'

df['sentiment'] = df['message'].apply(get_sentiment)
df[['employee_id', 'message', 'sentiment']].head()

## 2. Exploratory Data Analysis (EDA)

Explore sentiment distribution.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

print(df.info())
print(df.describe(include='all'))

sns.countplot(x='sentiment', data=df)
plt.title("Sentiment Distribution")
plt.show()

## 3. Monthly Sentiment Scoring

Assign numeric scores and calculate employee sentiment per month.

In [None]:
df['score'] = df['sentiment'].map({'Positive': 1, 'Negative': -1, 'Neutral': 0})
df['month'] = df['date'].dt.to_period('M')
monthly_scores = df.groupby(['employee_id', 'month'])['score'].sum().reset_index()
monthly_scores.head()

## 4. Employee Ranking

Identify top 3 positive and negative employees overall.

In [None]:
overall_scores = monthly_scores.groupby('employee_id')['score'].sum().reset_index()
top3_positive = overall_scores.sort_values(by='score', ascending=False).head(3)
top3_negative = overall_scores.sort_values(by='score').head(3)

print("Top 3 Positive Employees:")
display(top3_positive)

print("Top 3 Negative Employees:")
display(top3_negative)

sns.barplot(x='employee_id', y='score', data=top3_positive, palette='Greens_r')
plt.title('Top 3 Positive Employees')
plt.show()

sns.barplot(x='employee_id', y='score', data=top3_negative, palette='Reds')
plt.title('Top 3 Negative Employees')
plt.show()

## 5. Flight Risk Detection

Flag employees with 4+ negative messages in any rolling 30-day window.

In [None]:
flight_risk_employees = set()
negative_msgs = df[df['sentiment'] == 'Negative'].copy()
negative_msgs = negative_msgs.sort_values(by=['employee_id', 'date'])

for emp_id in negative_msgs['employee_id'].unique():
    emp_data = negative_msgs[negative_msgs['employee_id'] == emp_id]
    for i in range(len(emp_data)):
        start = emp_data.iloc[i]['date']
        end = start + pd.Timedelta(days=30)
        count = emp_data[(emp_data['date'] >= start) & (emp_data['date'] <= end)].shape[0]
        if count >= 4:
            flight_risk_employees.add(emp_id)
            break

if flight_risk_employees:
    print("Flight Risk Employees:", flight_risk_employees)
else:
    print("No flight risks found in this dataset.")

## 6. Predictive Modeling

Build a linear regression model to predict sentiment scores based on time.

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

model_data = df[['employee_id', 'date', 'score']].copy()
model_data['month'] = model_data['date'].dt.to_period('M')
model_data['month_num'] = model_data['month'].dt.month

X = model_data[['month_num']]
y = model_data['score']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

r2_score = model.score(X_test, y_test)
print("Model R² Score:", round(r2_score, 3))

y_pred = model.predict(X_test)
plt.scatter(X_test, y_test, color='blue', label='Actual')
plt.scatter(X_test, y_pred, color='red', label='Predicted')
plt.title("Actual vs Predicted Sentiment Scores")
plt.xlabel("Month Number")
plt.ylabel("Sentiment Score")
plt.legend()
plt.grid(True)
plt.show()