<a href="https://colab.research.google.com/github/LaxminarayananV/intensity_analysis/blob/main/intensity_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from imblearn.pipeline import Pipeline as ImbPipeline
from imblearn.over_sampling import SMOTE

# Load data from CSV files
sadness_data = pd.read_csv("sadness.csv")
angriness_data = pd.read_csv("angriness.csv")
happiness_data = pd.read_csv("happiness.csv")

# Merge datasets and preprocess
all_data = pd.concat([sadness_data, angriness_data, happiness_data], ignore_index=True)
all_data.drop_duplicates(inplace=True)
all_data.dropna(inplace=True)
all_data['content'] = all_data['content'].apply(lambda x: x.lower())


In [4]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(all_data['content'], all_data['intensity'], test_size=0.2, random_state=42)


In [5]:
# Define pipelines for SVM and Random Forest classifiers with SMOTE oversampling
svm_pipeline = ImbPipeline([
    ('tfidf', TfidfVectorizer(max_features=5000)),
    ('smote', SMOTE(random_state=42)),
    ('svm', SVC(probability=True, random_state=42))
])

rf_pipeline = ImbPipeline([
    ('tfidf', TfidfVectorizer(max_features=5000)),
    ('smote', SMOTE(random_state=42)),
    ('rf', RandomForestClassifier(random_state=42))
])


In [6]:
# Define parameter grids for hyperparameter tuning
param_grid_svm = {
    'svm__C': [0.1, 1, 10, 100],
    'svm__kernel': ['linear', 'rbf'],
    'svm__gamma': ['scale', 'auto']
}

param_grid_rf = {
    'rf__n_estimators': [100, 200, 300],
    'rf__max_depth': [10, 20, 30],
    'rf__min_samples_split': [2, 5, 10],
    'rf__min_samples_leaf': [1, 2, 4]
}


In [None]:
# Perform GridSearchCV with the pipelines and parameter grids
grid_search_svm = GridSearchCV(svm_pipeline, param_grid_svm, cv=5, verbose=1, n_jobs=-1)
grid_search_svm.fit(X_train, y_train)

grid_search_rf = GridSearchCV(rf_pipeline, param_grid_rf, cv=5, verbose=1, n_jobs=-1)
grid_search_rf.fit(X_train, y_train)


In [None]:
# Get best models from the grid search
best_model_svm = grid_search_svm.best_estimator_
best_model_rf = grid_search_rf.best_estimator_


In [None]:
# Evaluate best models
y_pred_svm = best_model_svm.predict(X_test)
accuracy_svm = accuracy_score(y_test, y_pred_svm)
print("Accuracy of SVM Classifier:", accuracy_svm)
print(classification_report(y_test, y_pred_svm))

y_pred_rf = best_model_rf.predict(X_test)
accuracy_rf = accuracy_score(y_test, y_pred_rf)
print("Accuracy of Random Forest Classifier:", accuracy_rf)
print(classification_report(y_test, y_pred_rf))


In [None]:
# Get input from user and predict emotion
user_input = input("Enter a text: ").lower()
predicted_emotion_svm = best_model_svm.predict([user_input])
predicted_emotion_rf = best_model_rf.predict([user_input])

print("Predicted Emotion (SVM):", predicted_emotion_svm[0])
print("Predicted Emotion (Random Forest):", predicted_emotion_rf[0])


afgagdgdagagea
