In [4]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the original dataset
data = pd.read_csv('shuffled_redditinput.csv')
X = data['text']
y = data['emotion']

# Preprocess the data using TfidfVectorizer
vectorizer = TfidfVectorizer(stop_words='english')
X_vectorized = vectorizer.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.2, random_state=42)

# Train the neural network model
nn_clf = MLPClassifier(hidden_layer_sizes=(100,), random_state=42)
nn_clf.fit(X_train, y_train)

# Evaluate the model on the test set
y_test_pred = nn_clf.predict(X_test)
accuracy = accuracy_score(y_test, y_test_pred)
print(f"Accuracy: {accuracy:.2f}")
print("Classification report:\n", classification_report(y_test, y_test_pred))

# Load the new dataset
new_data = pd.read_csv('shuffled_tweetspredit.csv')
X_new = new_data['text']

# Preprocess the new text data using the same TfidfVectorizer
X_new_vectorized = vectorizer.transform(X_new)

# Make predictions on the new dataset
y_new_pred = nn_clf.predict(X_new_vectorized)

# Load the true labels (emotions) for the new dataset
y_new_true = new_data['emotion']

# Calculate the accuracy
accuracy = accuracy_score(y_new_true, y_new_pred)
print(f"Accuracy: {accuracy:.2f}")

# Generate a classification report
report = classification_report(y_new_true, y_new_pred)
print("Classification report:\n", report)



Accuracy: 0.58
Classification report:
               precision    recall  f1-score   support

       angry       0.56      0.59      0.58      1649
     disgust       0.41      0.34      0.37      1044
        fear       0.51      0.49      0.50       636
       happy       0.74      0.70      0.72      1579
         sad       0.59      0.61      0.60      1376
    surprise       0.56      0.63      0.59      1084

    accuracy                           0.58      7368
   macro avg       0.56      0.56      0.56      7368
weighted avg       0.58      0.58      0.58      7368

Accuracy: 0.51
Classification report:
               precision    recall  f1-score   support

       angry       0.39      0.55      0.46        20
     disgust       0.68      0.65      0.67        20
        fear       0.67      0.19      0.30        21
       happy       0.52      0.65      0.58        20
         sad       0.55      0.60      0.57        20
    surprise       0.43      0.45      0.44        20
