In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_selection import SelectKBest, f_classif
import time
from tensorflow import keras
from tensorflow.keras import layers

In [2]:
data = pd.read_csv('emotions.csv')  
X = data.drop('label', axis=1)
y = data['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

In [4]:
# L1 Regularization
l1_logistic_model = LogisticRegression(penalty='l1', solver='liblinear', C=1.0)
l1_logistic_model.fit(X_train, y_train_encoded)
l1_train_accuracy = l1_logistic_model.score(X_train, y_train_encoded)
l1_test_accuracy = l1_logistic_model.score(X_test, y_test_encoded)

In [5]:
# Feature selection with L1 regularization
feature_mask = l1_logistic_model.coef_[0] != 0
X_train_selected_l1 = X_train.loc[:, feature_mask]
X_test_selected_l1 = X_test.loc[:, feature_mask]

In [6]:
# SelectKBest with f_classif
selector = SelectKBest(f_classif, k='all')
X_train_selected_kbest = selector.fit_transform(X_train_selected_l1, y_train_encoded)
X_test_selected_kbest = selector.transform(X_test_selected_l1)

In [7]:
# Train Random Forest on SelectKBest features
rf_model = RandomForestClassifier()
rf_model.fit(X_train_selected_kbest, y_train_encoded)
rf_train_accuracy = rf_model.score(X_train_selected_kbest, y_train_encoded)
rf_test_accuracy = rf_model.score(X_test_selected_kbest, y_test_encoded)

In [8]:
# Train XGBoost on SelectKBest features
xgb_model = XGBClassifier()
xgb_model.fit(X_train_selected_kbest, y_train_encoded)
xgb_train_accuracy = xgb_model.score(X_train_selected_kbest, y_train_encoded)
xgb_test_accuracy = xgb_model.score(X_test_selected_kbest, y_test_encoded)

In [9]:
# Train SVM on SelectKBest features
svm_model = SVC()
svm_model.fit(X_train_selected_kbest, y_train_encoded)
svm_train_accuracy = svm_model.score(X_train_selected_kbest, y_train_encoded)
svm_test_accuracy = svm_model.score(X_test_selected_kbest, y_test_encoded)

In [12]:
X_train_cnn = X_train_selected_kbest.reshape(X_train_selected_kbest.shape[0], X_train_selected_kbest.shape[1], 1)
X_test_cnn = X_test_selected_kbest.reshape(X_test_selected_kbest.shape[0], X_test_selected_kbest.shape[1], 1)

In [13]:
# Train CNN
X_train_cnn = X_train_selected_kbest.reshape(X_train_selected_kbest.shape[0], X_train_selected_kbest.shape[1], 1)
X_test_cnn = X_test_selected_kbest.reshape(X_test_selected_kbest.shape[0], X_test_selected_kbest.shape[1], 1)

cnn_model = keras.Sequential([
    layers.Input(shape=(X_train_cnn.shape[1], X_train_cnn.shape[2])),
    layers.Conv1D(64, 3, activation='relu'),
    layers.MaxPooling1D(2),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(y_train_encoded.max() + 1, activation='softmax')
])
cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
cnn_model.fit(X_train_cnn, y_train_encoded, epochs=10, batch_size=32, validation_split=0.2)
cnn_loss, cnn_accuracy = cnn_model.evaluate(X_test_cnn, y_test_encoded)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [15]:
# Train RNN
X_train_rnn = X_train_selected_kbest.reshape(X_train_selected_kbest.shape[0], X_train_selected_kbest.shape[1], 1)
X_test_rnn = X_test_selected_kbest.reshape(X_test_selected_kbest.shape[0], X_test_selected_kbest.shape[1], 1)

rnn_model = keras.Sequential([
    layers.Input(shape=(X_train_rnn.shape[1], X_train_rnn.shape[2])),
    layers.LSTM(64, return_sequences=True),
    layers.LSTM(64),
    layers.Dense(y_train_encoded.max() + 1, activation='softmax')
])
rnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
rnn_model.fit(X_train_rnn, y_train_encoded, epochs=10, batch_size=32, validation_split=0.2)
rnn_loss, rnn_accuracy = rnn_model.evaluate(X_test_rnn, y_test_encoded)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [16]:
# Print Results
print("L1 Logistic Regression Train Accuracy:", l1_train_accuracy)
print("L1 Logistic Regression Test Accuracy:", l1_test_accuracy)

print("Random Forest Train Accuracy:", rf_train_accuracy)
print("Random Forest Test Accuracy:", rf_test_accuracy)

print("XGBoost Train Accuracy:", xgb_train_accuracy)
print("XGBoost Test Accuracy:", xgb_test_accuracy)

print("SVM Train Accuracy:", svm_train_accuracy)
print("SVM Test Accuracy:", svm_test_accuracy)

print("CNN Accuracy:", cnn_accuracy)

print("RNN Accuracy:", rnn_accuracy)

L1 Logistic Regression Train Accuracy: 1.0
L1 Logistic Regression Test Accuracy: 0.9508196721311475
Random Forest Train Accuracy: 1.0
Random Forest Test Accuracy: 0.9789227166276346
XGBoost Train Accuracy: 1.0
XGBoost Test Accuracy: 0.9882903981264637
SVM Train Accuracy: 0.39413489736070384
SVM Test Accuracy: 0.38875878220140514
CNN Accuracy: 0.6370023488998413
RNN Accuracy: 0.8969554901123047


In [22]:
pip install scikit-optimize

Collecting scikit-optimize
  Downloading scikit_optimize-0.9.0-py2.py3-none-any.whl (100 kB)
     ---------------------------------------- 0.0/100.3 kB ? eta -:--:--
     -------------------------------------- 100.3/100.3 kB 2.9 MB/s eta 0:00:00
Collecting pyaml>=16.9
  Downloading pyaml-23.9.7-py3-none-any.whl (23 kB)
Installing collected packages: pyaml, scikit-optimize
Successfully installed pyaml-23.9.7 scikit-optimize-0.9.0
Note: you may need to restart the kernel to use updated packages.


In [29]:
pip install --upgrade tensorflow

Collecting tensorflow
  Downloading tensorflow-2.15.0-cp311-cp311-win_amd64.whl (2.1 kB)
Collecting tensorflow-intel==2.15.0
  Downloading tensorflow_intel-2.15.0-cp311-cp311-win_amd64.whl (300.9 MB)
     ---------------------------------------- 0.0/300.9 MB ? eta -:--:--
     ---------------------------------------- 0.2/300.9 MB 6.3 MB/s eta 0:00:48
     ---------------------------------------- 0.8/300.9 MB 9.8 MB/s eta 0:00:31
     --------------------------------------- 1.6/300.9 MB 13.1 MB/s eta 0:00:23
     --------------------------------------- 2.7/300.9 MB 15.5 MB/s eta 0:00:20
      -------------------------------------- 3.9/300.9 MB 17.7 MB/s eta 0:00:17
      -------------------------------------- 5.2/300.9 MB 19.7 MB/s eta 0:00:16
      -------------------------------------- 6.7/300.9 MB 21.5 MB/s eta 0:00:14
     - ------------------------------------- 8.2/300.9 MB 22.7 MB/s eta 0:00:13
     - ------------------------------------- 9.4/300.9 MB 23.1 MB/s eta 0:00:13
     - 

ERROR: Could not install packages due to an OSError: [WinError 5] Access is denied: 'C:\\Users\\jun\\anaconda3\\envs\\env2\\Lib\\site-packages\\google\\~upb\\_message.pyd'
Consider using the `--user` option or check the permissions.



In [31]:
pip install --upgrade pip

Collecting pip
  Downloading pip-23.3.1-py3-none-any.whl (2.1 MB)
     ---------------------------------------- 0.0/2.1 MB ? eta -:--:--
     ----------------- ---------------------- 0.9/2.1 MB 19.4 MB/s eta 0:00:01
     ------------------------------------- -- 2.0/2.1 MB 25.5 MB/s eta 0:00:01
     ---------------------------------------  2.1/2.1 MB 22.2 MB/s eta 0:00:01
     ---------------------------------------  2.1/2.1 MB 22.2 MB/s eta 0:00:01
     ---------------------------------------  2.1/2.1 MB 22.2 MB/s eta 0:00:01
     ---------------------------------------  2.1/2.1 MB 22.2 MB/s eta 0:00:01
     ---------------------------------------  2.1/2.1 MB 22.2 MB/s eta 0:00:01
     ---------------------------------------  2.1/2.1 MB 22.2 MB/s eta 0:00:01
     ---------------------------------------  2.1/2.1 MB 22.2 MB/s eta 0:00:01
     ---------------------------------------  2.1/2.1 MB 22.2 MB/s eta 0:00:01
     ---------------------------------------  2.1/2.1 MB 22.2 MB/s eta 0: