# This notebook expressed the training progress for CNN-LSTM model

## Data Import

In [1]:
# Import necessary libraries
import os
import sys
import pandas as pd
import numpy as np

In [18]:
# Suppress future warnings
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

In [6]:
# Add project root to Python path for imports
import os
project_root = os.path.abspath('..')  # Go up one level from notebooks to project root
if project_root not in sys.path:
    sys.path.append(project_root)

# Load processed data
data_path = os.path.join(project_root, 'data', 'processed', 'emotion.csv')
data = pd.read_csv(data_path)
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2367,2368,2369,2370,2371,2372,2373,2374,2375,Emotions
0,0.173828,0.264648,0.372559,0.486328,0.625488,0.681152,0.720703,0.651855,0.562988,0.559082,...,0.766315,0.755441,0.743671,0.731043,0.717561,0.703260,0.688124,0.672208,0.655518,neutral
1,0.231934,0.357422,0.482910,0.472656,0.480469,0.484375,0.491699,0.490723,0.500977,0.503418,...,1.594981,-1.356783,3.955091,1.570934,1.334768,0.901523,-0.066584,-6.374361,-7.761413,neutral
2,0.256348,0.354492,0.471191,0.414062,0.378418,0.373535,0.354492,0.408691,0.466797,0.522461,...,0.006713,0.006601,0.006483,0.006356,0.006218,0.006066,0.005919,0.005759,0.005588,neutral
3,0.384766,0.576172,0.764160,0.778809,0.742188,0.752930,0.787109,0.799316,0.700195,0.590332,...,6.918442,6.534257,6.109815,5.613272,5.033775,4.383093,3.692577,3.005888,2.370324,neutral
4,0.247070,0.389648,0.564941,0.642578,0.665527,0.684570,0.676270,0.666992,0.648926,0.595703,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,neutral
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48643,0.025391,0.035645,0.043457,0.076660,0.116211,0.119141,0.121094,0.081543,0.038574,0.032715,...,2.149720,-1.269249,8.457794,-0.209345,3.439721,10.229767,14.676046,4.467748,4.202962,surprise
48644,0.000000,0.000000,0.006348,0.016113,0.025879,0.034180,0.036621,0.033691,0.032715,0.032227,...,-4.478354,-5.067346,-4.775791,0.778204,5.260514,-0.345362,-1.215722,8.969276,7.940011,surprise
48645,0.078125,0.125977,0.163574,0.132324,0.104980,0.066406,0.065918,0.074707,0.076660,0.081055,...,-5.975361,-4.235834,-2.939499,-1.558753,0.182943,2.841793,4.177210,4.016248,0.110962,surprise
48646,0.004883,0.015625,0.026367,0.036621,0.042969,0.041504,0.043457,0.043457,0.041504,0.042480,...,-17.944071,-13.947256,-3.418745,6.305834,9.191437,-7.628603,1.824499,13.876075,-1.117508,surprise


In [12]:
# Check if data has any NaN values
print(data.isna().any())

0           False
1           False
2           False
3           False
4           False
            ...  
2372         True
2373         True
2374         True
2375         True
Emotions    False
Length: 2377, dtype: bool


In [13]:
# In preprocessing data, there are so many problems which can create NaN values, so in this situation, we just replace them with 0 to make it simple.
Emotions= data.fillna(0)
print(Emotions.isna().any())
Emotions.shape

0           False
1           False
2           False
3           False
4           False
            ...  
2372        False
2373        False
2374        False
2375        False
Emotions    False
Length: 2377, dtype: bool


(48648, 2377)

In [19]:
np.sum(Emotions.isna())

0           0
1           0
2           0
3           0
4           0
           ..
2372        0
2373        0
2374        0
2375        0
Emotions    0
Length: 2377, dtype: int64

## Data Preprocess

In [20]:
# Taking all rows and all cols without last col for X which include features
# Taking last col for Y, which include the emotions

X,Y = Emotions.iloc[:,:-1].values, Emotions['Emotions'].values

In [21]:
# As this is a multiclass classification problem onehotencoding our Y
from sklearn.preprocessing import OneHotEncoder

encoder = OneHotEncoder()
Y = encoder.fit_transform(np.array(Y).reshape(-1,1)).toarray()

In [23]:
print('X shape: ',X.shape)
print('Y shape: ',Y.shape)

X shape:  (48648, 2376)
Y shape:  (48648, 7)


In [24]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(X, Y, random_state=42,test_size=0.2, shuffle=True)
x_train.shape, y_train.shape, x_test.shape, y_test.shape

((38918, 2376), (38918, 7), (9730, 2376), (9730, 7))

## Train mode