In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
import pickle

# Load the dataset from the CSV file
df = pd.read_csv('datasets/emoji_dataset.csv')

# Map emojis to numbers for model training
emoji_to_num = {
    '😊': 1, '😃': 2, '😄': 3, '😎': 4, '😇': 5,
    '😢': 6, '😞': 7, '😔': 8, '😱': 9, '😂': 10
}

# Map emojis in dataset to numbers
for col in ['emoji_1', 'emoji_2', 'emoji_3', 'emoji_4', 'emoji_5']:
    df[col] = df[col].map(emoji_to_num)

# Encode labels (happy, depressed)
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label'])

# Split the dataset into features (X) and labels (y)
X = df[['emoji_1', 'emoji_2', 'emoji_3', 'emoji_4', 'emoji_5']]
y = df['label']

# Split into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the RandomForestClassifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Save the trained model to a file
with open('emoji_mood_model.pkl', 'wb') as model_file:
    pickle.dump(model, model_file)

# Save the label encoder to map the predictions back to labels
with open('label_encoder.pkl', 'wb') as le_file:
    pickle.dump(label_encoder, le_file)

# Optionally, check the model accuracy
accuracy = model.score(X_test, y_test)
print(f'Model Accuracy: {accuracy}')


Model Accuracy: 1.0
