In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import joblib

""" Performing Aspect Based Sentiment Analysis """

df = pd.read_csv('../data/preprocessed_data.csv')
aspects = {
    'Work Life Balance': 'work_life_balance',
    'Skill Development': 'skill_development',
    'Salary and Benefits': 'salary_and_benefits',
    'Job Security': 'job_security',
    'Career Growth': 'career_growth',
    'Work Satisfaction': 'work_satisfaction'
}

for aspect_name, aspect_column in aspects.items():
    print(f"\nAspect: {aspect_name}")
    X_train, X_test, y_train, y_test = train_test_split(df['Text'], df['Sentiment'], test_size=0.2, random_state=42)

    vectorizer = TfidfVectorizer()
    X_train_vec = vectorizer.fit_transform(X_train)
    X_test_vec = vectorizer.transform(X_test)

    rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
    rf_model.fit(X_train_vec, y_train)

    model_filename = f'../models/rf_model_{aspect_column}.pkl'
    vectorizer_filename = f'../models/tfidf_vectorizer_{aspect_column}.pkl'
    joblib.dump(rf_model, model_filename)
    joblib.dump(vectorizer, vectorizer_filename)

    y_pred = rf_model.predict(X_test_vec)
    print(classification_report(y_test, y_pred))



Aspect: Work Life Balance
              precision    recall  f1-score   support

          -1       0.36      0.47      0.41        38
           0       1.00      0.03      0.06        31
           1       0.78      0.85      0.81       195

    accuracy                           0.70       264
   macro avg       0.71      0.45      0.43       264
weighted avg       0.74      0.70      0.67       264


Aspect: Skill Development
              precision    recall  f1-score   support

          -1       0.36      0.47      0.41        38
           0       1.00      0.03      0.06        31
           1       0.78      0.85      0.81       195

    accuracy                           0.70       264
   macro avg       0.71      0.45      0.43       264
weighted avg       0.74      0.70      0.67       264


Aspect: Salary and Benefits
              precision    recall  f1-score   support

          -1       0.36      0.47      0.41        38
           0       1.00      0.03      0.06   