In [None]:
!pip install pandas numpy matplotlib seaborn scikit-learn transformers torch


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from scipy.special import softmax
from tqdm import tqdm


In [None]:
# Create a sample dataset of employee messages
data = {
    'employee_email': [
        'aarti@company.com','ravi@company.com','fatima@company.com',
        'john@company.com','megha@company.com','rohan@company.com'
    ] * 20,
    'date': pd.date_range('2024-01-01', periods=120, freq='D'),
    'message': [
        'Loving the new team structure!',
        'Workload feels too heavy lately.',
        'Team meeting went well, feeling motivated!',
        'Deadlines are stressing me out.',
        'Happy with the management and support.',
        'Need more clarity on project expectations.'
    ] * 20
}

df = pd.DataFrame(data)
df.to_csv('employee_messages.csv', index=False)
print("✅ Dataset created and saved as employee_messages.csv")
df.head()


In [None]:
MODEL = "cardiffnlp/twitter-roberta-base-sentiment-latest"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForSequenceClassification.from_pretrained(MODEL)


In [None]:
def get_sentiment(text):
    # Tokenize input
    encoded_text = tokenizer(text, return_tensors='pt', truncation=True)
    output = model(**encoded_text)
    scores = output[0][0].detach().numpy()
    scores = softmax(scores)

    labels = ['negative', 'neutral', 'positive']
    sentiment = labels[np.argmax(scores)]
    confidence = float(np.max(scores))
    return sentiment, confidence

# Apply sentiment detection
tqdm.pandas()
df[['sentiment', 'confidence']] = df['message'].progress_apply(
    lambda x: pd.Series(get_sentiment(x))
)

df.head()


In [None]:
sns.countplot(data=df, x='sentiment', palette='Set2')
plt.title("Employee Sentiment Distribution")
plt.xlabel("Sentiment Type")
plt.ylabel("Count of Messages")
plt.show()


In [None]:
df.to_csv('employee_sentiment_results.csv', index=False)
print("✅ Sentiment results saved as employee_sentiment_results.csv")


In [None]:
from google.colab import files
files.download("employee_sentiment_results.csv")
