In [2]:
import numpy as np
import pandas as pd
import math
from scipy.signal import detrend
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam
import pickle

In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
import cupy as cp
from scipy.signal import detrend

def detrend_signal(signal):
    return cp.array(detrend(cp.asnumpy(signal), axis=-1))

def baseline_correction(baseline, stimuli):
    baseline_mean = cp.mean(baseline, axis=-1, keepdims=True)
    return stimuli - baseline_mean

ModuleNotFoundError: No module named 'cupy'

In [13]:
# Feature extraction function
def feature_extraction_EEG(raw, secs):
    # 128 Hz is the sampling rate for the EEG data
    fs_EEG = 128
    N_EEG = math.ceil(fs_EEG * secs)
    features = []
    for electrode in eeg_electrodes:
        features += [f'{electrode}_raw']

    # DataFrame to store features
    columns = ['participant', 'video'] + features
    df = pd.DataFrame(columns=columns)
    
    for participant in range(23):
        for video in range(18):
            feature_values = []
            for i in range(14):
                basl = raw['DREAMER'][0, 0]['Data'][0, participant]['EEG'][0, 0]['baseline'][0, 0][video, 0][-1 - N_EEG: -1, i]
                stimuli = raw['DREAMER'][0, 0]['Data'][0, participant]['EEG'][0, 0]['stimuli'][0, 0][video, 0][-1 - N_EEG: -1, i]
                
                # Detrending
                basl_detrended = detrend_signal(basl)
                stimuli_detrended = detrend_signal(stimuli)
                
                # Baseline correction
                stimuli_corrected = baseline_correction(basl_detrended, stimuli_detrended)
                
                # Append raw signal
                feature_values.append(cp.asnumpy(stimuli_corrected))
            
            # Append to DataFrame
            row = [participant + 1, video + 1] + feature_values
            if len(row) == len(columns):
                df.loc[len(df)] = row
            else:
                print(f'Skipping participant {participant + 1}, video {video + 1}')
                print(len(row))
                print(len(columns))
    return df

In [16]:
eeg_electrodes = [str(e[0]) for e in raw['DREAMER']['EEG_Electrodes'][0][0][0]]
df = feature_extraction_EEG(raw, secs=60)  # Adjust `secs` as needed

In [152]:
import scipy.io as sio
raw = sio.loadmat('../DREAMER_Dataset/DREAMER.mat')

In [22]:
import pickle
with open('../Extracted_Features/Raw_Data.pickle', 'wb') as f:
    # Pickle the 'data' dictionary using the highest protocol available.
    pickle.dump(df, f, pickle.HIGHEST_PROTOCOL)
# The following example reads the resulting pickled data.

In [2]:
with open('../Extracted_Features/Raw_Data.pickle', 'rb') as f:
    # The protocol version used is detected automatically, so we do not
    # have to specify it.
    df = pickle.load(f)

In [7]:
df["AF3_raw"].iloc[1]

array([-0.38710523,  7.8181817 ,  5.25423785, ...,  3.39546835,
        9.0366527 , -0.70677832])

In [11]:
# create new dataframe with emotion, participant, and video data
a=np.zeros((23,18,9),dtype=object)
for participant in range(0,23):
    for video in range(0,18):
        a[participant,video,0]=raw['DREAMER'][0,0]['Data'][0,participant]['Age'][0][0][0]
        a[participant,video,1]=raw['DREAMER'][0,0]['Data'][0,participant]['Gender'][0][0][0]
        a[participant,video,2]=participant+1
        a[participant,video,3]=video+1
        a[participant,video,4]=['Searching for Bobby Fischer','D.O.A.', 'The Hangover', 'The Ring', '300',
                  'National Lampoon\'s VanWilder', 'Wall-E', 'Crash', 'My Girl', 'The Fly',
                  'Pride and Prejudice', 'Modern Times', 'Remember the Titans', 'Gentlemans Agreement',
                  'Psycho', 'The Bourne Identitiy', 'The Shawshank Redemption', 'The Departed'][video]
        a[participant,video,5]=['calmness', 'surprise', 'amusement', 'fear', 'excitement', 'disgust',
                  'happiness', 'anger', 'sadness', 'disgust', 'calmness', 'amusement',
                  'happiness', 'anger', 'fear', 'excitement', 'sadness', 'surprise'][video]
        a[participant,video,6]=raw['DREAMER'][0,0]['Data'][0,participant]['ScoreValence'][0,0][video,0]
        a[participant,video,7]=raw['DREAMER'][0,0]['Data'][0,participant]['ScoreArousal'][0,0][video,0]
        a[participant,video,8]=raw['DREAMER'][0,0]['Data'][0,participant]['ScoreDominance'][0,0][video,0]
b=pd.DataFrame(a.reshape((23*18,a.shape[2])),columns=['Age','Gender','Participant','Video','Video_Name','Target_Emotion','Valence','Arousal','Dominance'])

In [12]:
b

Unnamed: 0,Age,Gender,Participant,Video,Video_Name,Target_Emotion,Valence,Arousal,Dominance
0,22,male,1,1,Searching for Bobby Fischer,calmness,4,3,2
1,22,male,1,2,D.O.A.,surprise,3,3,1
2,22,male,1,3,The Hangover,amusement,5,4,4
3,22,male,1,4,The Ring,fear,4,3,2
4,22,male,1,5,300,excitement,4,4,4
...,...,...,...,...,...,...,...,...,...
409,25,male,23,14,Gentlemans Agreement,anger,2,2,2
410,25,male,23,15,Psycho,fear,2,2,2
411,25,male,23,16,The Bourne Identitiy,excitement,3,3,2
412,25,male,23,17,The Shawshank Redemption,sadness,2,2,4


In [185]:
b["Valence"].unique()

array([np.uint8(4), np.uint8(3), np.uint8(5), np.uint8(1), np.uint8(2)],
      dtype=object)

In [13]:
# Merge the feature DataFrame with metadata DataFrame
final_df = pd.merge(df, b, left_on=['participant', 'video'], right_on=['Participant', 'Video'])
final_df

Unnamed: 0,participant,video,AF3_raw,F7_raw,F3_raw,FC5_raw,T7_raw,P7_raw,O1_raw,O2_raw,...,AF4_raw,Age,Gender,Participant,Video,Video_Name,Target_Emotion,Valence,Arousal,Dominance
0,1,1,"[-10.349212474280488, 0.9320963567671932, 4.52...","[-7.336422979280564, -2.7212738534012937, 0.35...","[-6.357392050340919, -46.86718793292361, -64.3...","[27.834342803367694, -39.340612753736416, -63....","[22.051642042275102, -40.510159638368734, -71....","[-25.750224861795058, -41.64657512470267, -48....","[0.3891795592307721, -50.89300581509636, -65.7...","[43.423268402468864, -42.7273519601261, -81.69...",...,"[-0.5554431681218347, -74.90768364976493, -108...",22,male,1,1,Searching for Bobby Fischer,calmness,4,3,2
1,1,2,"[-0.38710522523616814, 7.818181696969973, 5.25...","[-0.9492062185599682, 8.281459209581069, 2.640...","[-29.84689530067401, -43.18212359451074, -35.4...","[-2.218007780462737, -29.91257528703265, -13.5...","[-29.2927823170884, -56.472712994591255, -43.6...","[-1.1194001362567376, -3.1707960975111944, 0.4...","[-31.946807814430898, -43.741895514175624, -29...","[-55.545831695510955, -85.80257081294347, -68....",...,"[-85.11469615543234, -90.75595696563374, -65.6...",22,male,1,2,D.O.A.,surprise,3,3,1
2,1,3,"[6.740238271785735, -1.464670313963118, -7.105...","[3.413843732230308, -4.790622767795466, -10.94...","[-47.36015967778116, -42.74643128266957, -39.6...","[-51.68328934853619, -26.555439767001214, -9.1...","[-62.37528256815315, -45.452432926611465, -30....","[-24.54733880679436, -18.391668699835865, -11....","[-77.01211916934554, -62.137931068780205, -46....","[-70.50876618489838, -48.97123494887409, -36.6...",...,"[-117.74840854597883, -110.56750845050888, -11...",22,male,1,3,The Hangover,amusement,5,4,4
3,1,4,"[-7.798221562777997, 4.509400087366941, 9.1247...","[-5.116608810783481, 4.626821600452269, 11.293...","[24.93125629049786, 11.084152372000009, 10.057...","[37.87436013579214, -10.845343361281941, -23.1...","[50.74481684256556, 2.538494081670473, -3.6165...","[9.49379155066421, 1.288462268119328, 2.313902...","[27.807237142070754, -3.4762036998813866, -3.4...","[77.15881665492797, -7.457717821908049, -38.22...",...,"[34.00357410810434, -22.402803761126375, -33.6...",22,male,1,4,The Ring,fear,4,3,2
4,1,5,"[-1.0540439758948354, 10.2278997652951, 7.6636...","[-2.913624451554164, 7.855432900298843, 3.7526...","[-77.78475320526675, -85.50147614873666, -73.7...","[-7.365409600031402, -40.69691126850421, -29.4...","[8.953908321623967, -76.68765838967207, -62.84...","[-7.614922043039722, -9.66652009112654, -4.025...","[-6.910755146870472, -41.27047808440868, -24.8...","[-30.26654058201691, -98.98452880038919, -47.7...",...,"[13.143391845043018, -77.11309307936695, -52.4...",22,male,1,5,300,excitement,4,4,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
409,23,14,"[-31.704198905027933, -32.72951525295624, -28....","[-15.555043379043072, -17.605404769358245, -20...","[-18.61622651269403, -13.999609198624913, -15....","[-14.698681349777496, -13.672134060956148, -14...","[-23.30060275410491, -22.78753654145279, -17.6...","[-8.816268873573902, -6.252239101327289, -8.81...","[279.79109403670617, 285.42943133898524, 287.4...","[-7.173135811725212, -1.0185719947610834, 2.05...",...,"[-37.972611579298515, -38.48291150179311, -40....",25,male,23,14,Gentlemans Agreement,anger,2,2,2
410,23,15,"[5.466920035649326, 2.9008550036569596, -1.203...","[19.824976691430564, 16.74719391801507, 16.233...","[22.984495698257362, 19.393947142874296, 18.36...","[10.522773668263424, 11.035265103820846, 11.03...","[21.95946583143874, 24.009312500894033, 28.110...","[21.238268640320484, 23.80170581178004, 21.236...","[20.594802762840782, 20.593162081299607, 20.59...","[38.0866367831533, 34.49652294398144, 32.44487...",...,"[18.966287091329455, 18.45239653087682, 15.374...",25,male,23,15,Psycho,fear,2,2,2
411,23,16,"[-23.62545386161945, -23.62685140441953, -20.0...","[-13.207442040493266, -10.132756016797817, -12...","[5.919693285935381, 5.404129601617996, 6.93984...","[2.4862210644104663, 4.021971582134875, 9.6602...","[-1.9057467428498671, -3.960635285737833, -15....","[4.3931845090082255, 4.390480711103866, 2.3364...","[-2.3892454894785056, -4.442585452677101, -7.0...","[19.635451596486917, 20.658453846575583, 22.70...",...,"[-18.03784008673325, -18.03905038031304, -18.5...",25,male,23,16,The Bourne Identitiy,excitement,3,3,2
412,23,17,"[-6.264400004813267, -5.239062339847094, -4.72...","[5.878397163971902, -6.430141588554244, -11.04...","[6.265703153080619, 5.7513450467094, 0.1087818...","[10.77405565695624, 11.28418632089431, 10.2558...","[-8.565089727038279, 7.844667256426551, 16.049...","[1.4333426212944123, 6.0485652422077125, 9.125...","[-36.47451244649021, -37.505569346830654, -39....","[-16.793433465247425, -12.178954818932333, -11...",...,"[4.8526302066835605, 7.928167153135863, 4.3370...",25,male,23,17,The Shawshank Redemption,sadness,2,2,4


In [14]:
final_df = final_df.drop(["Participant", "Video"], axis=1)
final_df

Unnamed: 0,participant,video,AF3_raw,F7_raw,F3_raw,FC5_raw,T7_raw,P7_raw,O1_raw,O2_raw,...,F4_raw,F8_raw,AF4_raw,Age,Gender,Video_Name,Target_Emotion,Valence,Arousal,Dominance
0,1,1,"[-10.349212474280488, 0.9320963567671932, 4.52...","[-7.336422979280564, -2.7212738534012937, 0.35...","[-6.357392050340919, -46.86718793292361, -64.3...","[27.834342803367694, -39.340612753736416, -63....","[22.051642042275102, -40.510159638368734, -71....","[-25.750224861795058, -41.64657512470267, -48....","[0.3891795592307721, -50.89300581509636, -65.7...","[43.423268402468864, -42.7273519601261, -81.69...",...,"[-10.364532668578148, -26.259576176464076, -32...","[-2.7922149552880873, -51.50648426600064, -69....","[-0.5554431681218347, -74.90768364976493, -108...",22,male,Searching for Bobby Fischer,calmness,4,3,2
1,1,2,"[-0.38710522523616814, 7.818181696969973, 5.25...","[-0.9492062185599682, 8.281459209581069, 2.640...","[-29.84689530067401, -43.18212359451074, -35.4...","[-2.218007780462737, -29.91257528703265, -13.5...","[-29.2927823170884, -56.472712994591255, -43.6...","[-1.1194001362567376, -3.1707960975111944, 0.4...","[-31.946807814430898, -43.741895514175624, -29...","[-55.545831695510955, -85.80257081294347, -68....",...,"[-17.208695619364846, -16.19140531780937, -11....","[-122.12733116987303, -138.02650317995787, -12...","[-85.11469615543234, -90.75595696563374, -65.6...",22,male,D.O.A.,surprise,3,3,1
2,1,3,"[6.740238271785735, -1.464670313963118, -7.105...","[3.413843732230308, -4.790622767795466, -10.94...","[-47.36015967778116, -42.74643128266957, -39.6...","[-51.68328934853619, -26.555439767001214, -9.1...","[-62.37528256815315, -45.452432926611465, -30....","[-24.54733880679436, -18.391668699835865, -11....","[-77.01211916934554, -62.137931068780205, -46....","[-70.50876618489838, -48.97123494887409, -36.6...",...,"[-21.93647870702945, -27.57728772842303, -25.5...","[-92.89575464126565, -81.61575420193722, -68.7...","[-117.74840854597883, -110.56750845050888, -11...",22,male,The Hangover,amusement,5,4,4
3,1,4,"[-7.798221562777997, 4.509400087366941, 9.1247...","[-5.116608810783481, 4.626821600452269, 11.293...","[24.93125629049786, 11.084152372000009, 10.057...","[37.87436013579214, -10.845343361281941, -23.1...","[50.74481684256556, 2.538494081670473, -3.6165...","[9.49379155066421, 1.288462268119328, 2.313902...","[27.807237142070754, -3.4762036998813866, -3.4...","[77.15881665492797, -7.457717821908049, -38.22...",...,"[5.750397059373909, 2.6719938533138072, 4.2089...","[44.528006638508316, 13.75661411855691, 10.164...","[34.00357410810434, -22.402803761126375, -33.6...",22,male,The Ring,fear,4,3,2
4,1,5,"[-1.0540439758948354, 10.2278997652951, 7.6636...","[-2.913624451554164, 7.855432900298843, 3.7526...","[-77.78475320526675, -85.50147614873666, -73.7...","[-7.365409600031402, -40.69691126850421, -29.4...","[8.953908321623967, -76.68765838967207, -62.84...","[-7.614922043039722, -9.66652009112654, -4.025...","[-6.910755146870472, -41.27047808440868, -24.8...","[-30.26654058201691, -98.98452880038919, -47.7...",...,"[-22.956039179678584, -22.95605209016051, -15....","[5.610392693926274, -68.23502592613708, -65.67...","[13.143391845043018, -77.11309307936695, -52.4...",22,male,300,excitement,4,4,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
409,23,14,"[-31.704198905027933, -32.72951525295624, -28....","[-15.555043379043072, -17.605404769358245, -20...","[-18.61622651269403, -13.999609198624913, -15....","[-14.698681349777496, -13.672134060956148, -14...","[-23.30060275410491, -22.78753654145279, -17.6...","[-8.816268873573902, -6.252239101327289, -8.81...","[279.79109403670617, 285.42943133898524, 287.4...","[-7.173135811725212, -1.0185719947610834, 2.05...",...,"[-1240.0690002671954, -1196.9337449674017, -93...","[-15.402448839378042, -15.401729520758739, -17...","[-37.972611579298515, -38.48291150179311, -40....",25,male,Gentlemans Agreement,anger,2,2,2
410,23,15,"[5.466920035649326, 2.9008550036569596, -1.203...","[19.824976691430564, 16.74719391801507, 16.233...","[22.984495698257362, 19.393947142874296, 18.36...","[10.522773668263424, 11.035265103820846, 11.03...","[21.95946583143874, 24.009312500894033, 28.110...","[21.238268640320484, 23.80170581178004, 21.236...","[20.594802762840782, 20.593162081299607, 20.59...","[38.0866367831533, 34.49652294398144, 32.44487...",...,"[-256.4677886910428, -450.34520134403937, -462...","[21.75392584043096, 20.215181211643205, 16.112...","[18.966287091329455, 18.45239653087682, 15.374...",25,male,Psycho,fear,2,2,2
411,23,16,"[-23.62545386161945, -23.62685140441953, -20.0...","[-13.207442040493266, -10.132756016797817, -12...","[5.919693285935381, 5.404129601617996, 6.93984...","[2.4862210644104663, 4.021971582134875, 9.6602...","[-1.9057467428498671, -3.960635285737833, -15....","[4.3931845090082255, 4.390480711103866, 2.3364...","[-2.3892454894785056, -4.442585452677101, -7.0...","[19.635451596486917, 20.658453846575583, 22.70...",...,"[-82.28162249200449, -125.86801543132987, -127...","[-9.257969718958025, -6.69583084101619, -3.620...","[-18.03784008673325, -18.03905038031304, -18.5...",25,male,The Bourne Identitiy,excitement,3,3,2
412,23,17,"[-6.264400004813267, -5.239062339847094, -4.72...","[5.878397163971902, -6.430141588554244, -11.04...","[6.265703153080619, 5.7513450467094, 0.1087818...","[10.77405565695624, 11.28418632089431, 10.2558...","[-8.565089727038279, 7.844667256426551, 16.049...","[1.4333426212944123, 6.0485652422077125, 9.125...","[-36.47451244649021, -37.505569346830654, -39....","[-16.793433465247425, -12.178954818932333, -11...",...,"[11.40135085381238, -8.08989651508042, -39.888...","[-9.197435699224982, 1.0585177042776617, 4.134...","[4.8526302066835605, 7.928167153135863, 4.3370...",25,male,The Shawshank Redemption,sadness,2,2,4


In [35]:
import pickle
with open('../Extracted_Features/Final_Raw_Data.pickle', 'wb') as f:
    # Pickle the 'data' dictionary using the highest protocol available.
    pickle.dump(final_df, f, pickle.HIGHEST_PROTOCOL)
# The following example reads the resulting pickled data.

In [6]:
with open('../Extracted_Features/Final_Raw_Data.pickle', 'rb') as f:
    # The protocol version used is detected automatically, so we do not
    # have to specify it.
    final_df = pickle.load(f)

ModuleNotFoundError: No module named 'numpy._core.numeric'

In [4]:
final_df

Unnamed: 0,participant,video,AF3_raw,F7_raw,F3_raw,FC5_raw,T7_raw,P7_raw,O1_raw,O2_raw,...,F4_raw,F8_raw,AF4_raw,Age,Gender,Video_Name,Target_Emotion,Valence,Arousal,Dominance
0,1,1,"[-10.349212474280488, 0.9320963567671932, 4.52...","[-7.336422979280564, -2.7212738534012937, 0.35...","[-6.357392050340919, -46.86718793292361, -64.3...","[27.834342803367694, -39.340612753736416, -63....","[22.051642042275102, -40.510159638368734, -71....","[-25.750224861795058, -41.64657512470267, -48....","[0.3891795592307721, -50.89300581509636, -65.7...","[43.423268402468864, -42.7273519601261, -81.69...",...,"[-10.364532668578148, -26.259576176464076, -32...","[-2.7922149552880873, -51.50648426600064, -69....","[-0.5554431681218347, -74.90768364976493, -108...",22,male,Searching for Bobby Fischer,calmness,4,3,2
1,1,2,"[-0.38710522523616814, 7.818181696969973, 5.25...","[-0.9492062185599682, 8.281459209581069, 2.640...","[-29.84689530067401, -43.18212359451074, -35.4...","[-2.218007780462737, -29.91257528703265, -13.5...","[-29.2927823170884, -56.472712994591255, -43.6...","[-1.1194001362567376, -3.1707960975111944, 0.4...","[-31.946807814430898, -43.741895514175624, -29...","[-55.545831695510955, -85.80257081294347, -68....",...,"[-17.208695619364846, -16.19140531780937, -11....","[-122.12733116987303, -138.02650317995787, -12...","[-85.11469615543234, -90.75595696563374, -65.6...",22,male,D.O.A.,surprise,3,3,1
2,1,3,"[6.740238271785735, -1.464670313963118, -7.105...","[3.413843732230308, -4.790622767795466, -10.94...","[-47.36015967778116, -42.74643128266957, -39.6...","[-51.68328934853619, -26.555439767001214, -9.1...","[-62.37528256815315, -45.452432926611465, -30....","[-24.54733880679436, -18.391668699835865, -11....","[-77.01211916934554, -62.137931068780205, -46....","[-70.50876618489838, -48.97123494887409, -36.6...",...,"[-21.93647870702945, -27.57728772842303, -25.5...","[-92.89575464126565, -81.61575420193722, -68.7...","[-117.74840854597883, -110.56750845050888, -11...",22,male,The Hangover,amusement,5,4,4
3,1,4,"[-7.798221562777997, 4.509400087366941, 9.1247...","[-5.116608810783481, 4.626821600452269, 11.293...","[24.93125629049786, 11.084152372000009, 10.057...","[37.87436013579214, -10.845343361281941, -23.1...","[50.74481684256556, 2.538494081670473, -3.6165...","[9.49379155066421, 1.288462268119328, 2.313902...","[27.807237142070754, -3.4762036998813866, -3.4...","[77.15881665492797, -7.457717821908049, -38.22...",...,"[5.750397059373909, 2.6719938533138072, 4.2089...","[44.528006638508316, 13.75661411855691, 10.164...","[34.00357410810434, -22.402803761126375, -33.6...",22,male,The Ring,fear,4,3,2
4,1,5,"[-1.0540439758948354, 10.2278997652951, 7.6636...","[-2.913624451554164, 7.855432900298843, 3.7526...","[-77.78475320526675, -85.50147614873666, -73.7...","[-7.365409600031402, -40.69691126850421, -29.4...","[8.953908321623967, -76.68765838967207, -62.84...","[-7.614922043039722, -9.66652009112654, -4.025...","[-6.910755146870472, -41.27047808440868, -24.8...","[-30.26654058201691, -98.98452880038919, -47.7...",...,"[-22.956039179678584, -22.95605209016051, -15....","[5.610392693926274, -68.23502592613708, -65.67...","[13.143391845043018, -77.11309307936695, -52.4...",22,male,300,excitement,4,4,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
409,23,14,"[-31.704198905027933, -32.72951525295624, -28....","[-15.555043379043072, -17.605404769358245, -20...","[-18.61622651269403, -13.999609198624913, -15....","[-14.698681349777496, -13.672134060956148, -14...","[-23.30060275410491, -22.78753654145279, -17.6...","[-8.816268873573902, -6.252239101327289, -8.81...","[279.79109403670617, 285.42943133898524, 287.4...","[-7.173135811725212, -1.0185719947610834, 2.05...",...,"[-1240.0690002671954, -1196.9337449674017, -93...","[-15.402448839378042, -15.401729520758739, -17...","[-37.972611579298515, -38.48291150179311, -40....",25,male,Gentlemans Agreement,anger,2,2,2
410,23,15,"[5.466920035649326, 2.9008550036569596, -1.203...","[19.824976691430564, 16.74719391801507, 16.233...","[22.984495698257362, 19.393947142874296, 18.36...","[10.522773668263424, 11.035265103820846, 11.03...","[21.95946583143874, 24.009312500894033, 28.110...","[21.238268640320484, 23.80170581178004, 21.236...","[20.594802762840782, 20.593162081299607, 20.59...","[38.0866367831533, 34.49652294398144, 32.44487...",...,"[-256.4677886910428, -450.34520134403937, -462...","[21.75392584043096, 20.215181211643205, 16.112...","[18.966287091329455, 18.45239653087682, 15.374...",25,male,Psycho,fear,2,2,2
411,23,16,"[-23.62545386161945, -23.62685140441953, -20.0...","[-13.207442040493266, -10.132756016797817, -12...","[5.919693285935381, 5.404129601617996, 6.93984...","[2.4862210644104663, 4.021971582134875, 9.6602...","[-1.9057467428498671, -3.960635285737833, -15....","[4.3931845090082255, 4.390480711103866, 2.3364...","[-2.3892454894785056, -4.442585452677101, -7.0...","[19.635451596486917, 20.658453846575583, 22.70...",...,"[-82.28162249200449, -125.86801543132987, -127...","[-9.257969718958025, -6.69583084101619, -3.620...","[-18.03784008673325, -18.03905038031304, -18.5...",25,male,The Bourne Identitiy,excitement,3,3,2
412,23,17,"[-6.264400004813267, -5.239062339847094, -4.72...","[5.878397163971902, -6.430141588554244, -11.04...","[6.265703153080619, 5.7513450467094, 0.1087818...","[10.77405565695624, 11.28418632089431, 10.2558...","[-8.565089727038279, 7.844667256426551, 16.049...","[1.4333426212944123, 6.0485652422077125, 9.125...","[-36.47451244649021, -37.505569346830654, -39....","[-16.793433465247425, -12.178954818932333, -11...",...,"[11.40135085381238, -8.08989651508042, -39.888...","[-9.197435699224982, 1.0585177042776617, 4.134...","[4.8526302066835605, 7.928167153135863, 4.3370...",25,male,The Shawshank Redemption,sadness,2,2,4


In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import xgboost as xgb

# Assuming 'final_df' is already loaded

# Define the target variable
target = 'Target_Emotion'

# Extract DWT features
eeg_features = [col for col in final_df.columns if col.endswith(('raw'))]


In [6]:
# Encode the target variable
label_encoder = LabelEncoder()
final_df[target] = label_encoder.fit_transform(final_df[target])

In [7]:
# Prepare the data for EEG model
X = final_df[eeg_features]
y = final_df[target]

In [8]:
import torch
# setting device on GPU if available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

#Additional Info when using cuda
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')

Using device: cuda

NVIDIA RTX A2000 12GB
Memory Usage:
Allocated: 0.0 GB
Cached:    0.0 GB


In [11]:
X_array = X.to_numpy()

# Initialize an empty array to hold the reshaped data
reshaped_data = np.zeros((414, 14, 7680))

# Populate the reshaped_data array
for i in range(414):
    for j in range(14):
        reshaped_data[i, j, :] = X_array[i, j]

# Convert the reshaped NumPy array to a PyTorch tensor
standardized_data = torch.tensor(reshaped_data, dtype=torch.float32)

# # Normalize the data
# mean = standardized_data.mean(dim=(0, 2), keepdim=True)
# std = standardized_data.std(dim=(0, 2), keepdim=True)
# standardized_data = (standardized_data - mean) / std

In [12]:
y_indices = torch.tensor(y.to_numpy(), dtype=torch.long)

#### CNN Model on Raw EEG data

#### For classifying emotions

In [168]:
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader, Subset
import numpy as np
from sklearn.model_selection import KFold
import torch.optim as optim

# Assuming y_one_hot and standardized_data are already defined
# Convert y_one_hot to PyTorch tensor
# y_one_hot = torch.tensor(y_one_hot, dtype=torch.float32)

# Reshape standardized_data to fit the CNN input requirements
# Reshape each sample to (batch_size, in_channels, sequence_length)
standardized_data = torch.tensor(standardized_data, dtype=torch.float32).view(414, 14, 7680)

# Convert y_one_hot to class indices
# y_indices = torch.argmax(y_one_hot, dim=1)

# Create a dataset
dataset = TensorDataset(standardized_data, y_indices)

# Define a more complex CNN model
class ComplexCNNModel(nn.Module):
    def __init__(self):
        super(ComplexCNNModel, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=14, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm1d(32)
        self.pool = nn.MaxPool1d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv1d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm1d(64)
        self.conv3 = nn.Conv1d(64, 128, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm1d(128)
        self.conv4 = nn.Conv1d(128, 256, kernel_size=3, stride=1, padding=1)
        self.bn4 = nn.BatchNorm1d(256)
        self.fc1 = nn.Linear(256 * 480, 512)  # Adjusted input size
        self.bn5 = nn.BatchNorm1d(512)
        self.fc2 = nn.Linear(512, 128)
        self.bn6 = nn.BatchNorm1d(128)
        self.fc3 = nn.Linear(128, 9)
        self.dropout = nn.Dropout(0.5)  # Increased dropout rate

    def forward(self, x):
        x = self.pool(torch.relu(self.bn1(self.conv1(x))))
        x = self.pool(torch.relu(self.bn2(self.conv2(x))))
        x = self.pool(torch.relu(self.bn3(self.conv3(x))))
        x = self.pool(torch.relu(self.bn4(self.conv4(x))))
        x = x.view(x.size(0), -1)  # Ensure the batch size is preserved
        x = torch.relu(self.bn5(self.fc1(x)))
        x = self.dropout(x)
        x = torch.relu(self.bn6(self.fc2(x)))
        x = self.fc3(x)
        return x

# Define the loss function and optimizer with L2 regularization
criterion = nn.CrossEntropyLoss()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ComplexCNNModel().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)  # Increased weight decay
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

# Implement early stopping
class EarlyStopping:
    def __init__(self, patience=10, delta=0.01):
        self.patience = patience
        self.delta = delta
        self.best_score = None
        self.early_stop = False
        self.counter = 0

    def __call__(self, val_loss, model, path):
        score = -val_loss
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model, path)
        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model, path)
            self.counter = 0

    def save_checkpoint(self, val_loss, model, path):
        torch.save(model.state_dict(), path)

# Training loop with early stopping
early_stopping = EarlyStopping(patience=10, delta=0.01)
num_epochs = 30  # Increased number of epochs

# Function to save the model
def save_model(model, path):
    torch.save(model.state_dict(), path)

# Function to load the model
def load_model(model, path):
    model.load_state_dict(torch.load(path))

# Path to save the best model
model_path = '../Models/raw_data_cnn2.pth'

# K-Fold Cross Validation
k_folds = 5
kf = KFold(n_splits=k_folds, shuffle=True)

# Initialize lists to store loss and accuracy
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []

for fold, (train_idx, val_idx) in enumerate(kf.split(dataset)):
    print(f'Fold {fold+1}/{k_folds}')
    
    train_subset = Subset(dataset, train_idx)
    val_subset = Subset(dataset, val_idx)
    train_loader = DataLoader(train_subset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_subset, batch_size=32, shuffle=False)
    
    model = ComplexCNNModel().to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
    
    best_accuracy = 0.0
    
    for epoch in range(num_epochs):
        model.train()
        epoch_loss = 0
        for batch_x, batch_y in train_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            optimizer.zero_grad()
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            epoch_loss += loss.item()
        
        train_losses.append(epoch_loss / len(train_loader))
        scheduler.step()
        
        print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {train_losses[-1]:.4f}')
        
        model.eval()
        train_loss = 0
        correct_train_predictions = 0
        total_train_predictions = 0
        with torch.no_grad():
            for batch_x, batch_y in train_loader:
                batch_x, batch_y = batch_x.to(device), batch_y.to(device)
                outputs = model(batch_x)
                loss = criterion(outputs, batch_y)
                train_loss += loss.item()
                predicted_classes = torch.argmax(outputs, dim=1)
                correct_train_predictions += (predicted_classes == batch_y).sum().item()
                total_train_predictions += batch_x.size(0)
        
        train_accuracy = correct_train_predictions / total_train_predictions * 100
        train_accuracies.append(train_accuracy)
        
        print(f"Accuracy on training set: {train_accuracy:.2f}%")
        
        val_loss = 0
        correct_predictions = 0
        total_predictions = 0
        with torch.no_grad():
            for batch_x, batch_y in val_loader:
                batch_x, batch_y = batch_x.to(device), batch_y.to(device)
                outputs = model(batch_x)
                loss = criterion(outputs, batch_y)
                val_loss += loss.item()
                predicted_classes = torch.argmax(outputs, dim=1)
                correct_predictions += (predicted_classes == batch_y).sum().item()
                total_predictions += batch_x.size(0)
        
        val_losses.append(val_loss / len(val_loader))
        accuracy = correct_predictions / total_predictions * 100
        val_accuracies.append(accuracy)

        print(f'Validation Loss: {val_losses[-1]:.4f}')
        
        print(f"Accuracy on validation set: {accuracy:.2f}%\n")
        
        early_stopping(val_loss, model, model_path)
        
        if early_stopping.early_stop:
            print("Early stopping")
            break

# Load the best model parameters for final evaluation or further training
load_model(model, model_path)
print("Loaded the best model parameters for final evaluation or further training.")

Fold 1/5
Epoch [1/30], Training Loss: 2.2869
Accuracy on training set: 17.52%
Validation Loss: 2.2508
Accuracy on validation set: 8.43%

Epoch [2/30], Training Loss: 2.1278
Accuracy on training set: 20.85%
Validation Loss: 2.1989
Accuracy on validation set: 9.64%

Epoch [3/30], Training Loss: 1.8732
Accuracy on training set: 39.88%
Validation Loss: 2.1500
Accuracy on validation set: 13.25%

Epoch [4/30], Training Loss: 1.4333
Accuracy on training set: 60.73%
Validation Loss: 1.9942
Accuracy on validation set: 30.12%

Epoch [5/30], Training Loss: 0.9086
Accuracy on training set: 93.66%
Validation Loss: 1.6627
Accuracy on validation set: 50.60%

Epoch [6/30], Training Loss: 0.5948
Accuracy on training set: 97.58%
Validation Loss: 1.5425
Accuracy on validation set: 57.83%

Epoch [7/30], Training Loss: 0.4246
Accuracy on training set: 99.40%
Validation Loss: 1.5773
Accuracy on validation set: 49.40%

Epoch [8/30], Training Loss: 0.3181
Accuracy on training set: 99.70%
Validation Loss: 1.52

### Evaluating the model (Cross fold Validation)

In [171]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Function to evaluate the final model
def evaluate_final_model():
    k_folds = 5
    kf = KFold(n_splits=k_folds, shuffle=True)
    
    val_accuracies = []
    
    for fold, (train_idx, val_idx) in enumerate(kf.split(dataset)):
        print(f'Fold {fold+1}/{k_folds}')
        
        val_subset = Subset(dataset, val_idx)
        val_loader = DataLoader(val_subset, batch_size=64, shuffle=False)
        
        model.eval()
        
        correct_predictions = 0
        total_predictions = 0
        with torch.no_grad():
            for batch_x, batch_y in val_loader:
                batch_x, batch_y = batch_x.to(device), batch_y.to(device)
                outputs = model(batch_x)
                predicted_classes = torch.argmax(outputs, dim=1)
                correct_predictions += (predicted_classes == batch_y).sum().item()
                total_predictions += batch_x.size(0)
        
        accuracy = correct_predictions / total_predictions * 100
        val_accuracies.append(accuracy)
    
    mean_accuracy = np.mean(val_accuracies)
    print(f'Final Model Mean Validation Accuracy: {mean_accuracy:.2f}%')

evaluate_final_model()

Fold 1/5
Fold 2/5
Fold 3/5
Fold 4/5
Fold 5/5
Final Model Mean Validation Accuracy: 90.58%


In [198]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Function to evaluate the final model
def evaluate_model_per_class():
    k_folds = 5
    kf = KFold(n_splits=k_folds, shuffle=True)
    
    num_classes = 9
    val_accuracies_per_class = np.zeros((k_folds, num_classes))
    
    for fold, (train_idx, val_idx) in enumerate(kf.split(dataset)):
        print(f'Fold {fold+1}/{k_folds}')
        
        val_subset = Subset(dataset, val_idx)
        val_loader = DataLoader(val_subset, batch_size=64, shuffle=False)
        
        model.eval()
        
        correct_predictions_per_class = np.zeros(num_classes)
        total_predictions_per_class = np.zeros(num_classes)
        
        with torch.no_grad():
            for batch_x, batch_y in val_loader:
                batch_x, batch_y = batch_x.to(device), batch_y.to(device)
                outputs = model(batch_x)
                predicted_classes = torch.argmax(outputs, dim=1)
                
                for i in range(num_classes):
                    correct_predictions_per_class[i] += ((predicted_classes == i) & (batch_y == i)).sum().item()
                    total_predictions_per_class[i] += (batch_y == i).sum().item()
        
        for i in range(num_classes):
            if total_predictions_per_class[i] > 0:
                val_accuracies_per_class[fold, i] = correct_predictions_per_class[i] / total_predictions_per_class[i] * 100
            else:
                val_accuracies_per_class[fold, i] = float('nan')  # Handle case where there are no samples for a class
    
    mean_accuracies_per_class = np.nanmean(val_accuracies_per_class, axis=0)
    
    for i in range(num_classes):
        print(f'Final Model Mean Validation Accuracy for Class {label_encoder.inverse_transform([i])[0]}: {mean_accuracies_per_class[i]:.2f}%')

evaluate_model_per_class()

Fold 1/5
Fold 2/5
Fold 3/5
Fold 4/5
Fold 5/5
Final Model Mean Validation Accuracy for Class amusement: 96.79%
Final Model Mean Validation Accuracy for Class anger: 92.14%
Final Model Mean Validation Accuracy for Class calmness: 88.33%
Final Model Mean Validation Accuracy for Class disgust: 90.91%
Final Model Mean Validation Accuracy for Class excitement: 95.00%
Final Model Mean Validation Accuracy for Class fear: 89.17%
Final Model Mean Validation Accuracy for Class happiness: 90.73%
Final Model Mean Validation Accuracy for Class sadness: 90.95%
Final Model Mean Validation Accuracy for Class surprise: 91.46%


In [199]:
# Evaluating on random part of dataset just to check

In [13]:
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader, Subset
import numpy as np
from sklearn.model_selection import KFold
import torch.optim as optim

# Assuming y_one_hot and standardized_data are already defined
# Convert y_one_hot to PyTorch tensor
# y_one_hot = torch.tensor(y_one_hot, dtype=torch.float32)

# Reshape standardized_data to fit the CNN input requirements
# Reshape each sample to (batch_size, in_channels, sequence_length)
standardized_data = torch.tensor(standardized_data, dtype=torch.float32).view(414, 14, 7680)

# Convert y_one_hot to class indices
# y_indices = torch.argmax(y_one_hot, dim=1)

# Create a dataset
dataset = TensorDataset(standardized_data, y_indices)
# Define a more complex CNN model
class ComplexCNNModel(nn.Module):
    def __init__(self):
        super(ComplexCNNModel, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=14, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm1d(32)
        self.pool = nn.MaxPool1d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv1d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm1d(64)
        self.conv3 = nn.Conv1d(64, 128, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm1d(128)
        self.conv4 = nn.Conv1d(128, 256, kernel_size=3, stride=1, padding=1)
        self.bn4 = nn.BatchNorm1d(256)
        self.fc1 = nn.Linear(256 * 480, 512)  # Adjusted input size
        self.bn5 = nn.BatchNorm1d(512)
        self.fc2 = nn.Linear(512, 128)
        self.bn6 = nn.BatchNorm1d(128)
        self.fc3 = nn.Linear(128, 9)
        self.dropout = nn.Dropout(0.5)  # Increased dropout rate

    def forward(self, x):
        x = self.pool(torch.relu(self.bn1(self.conv1(x))))
        x = self.pool(torch.relu(self.bn2(self.conv2(x))))
        x = self.pool(torch.relu(self.bn3(self.conv3(x))))
        x = self.pool(torch.relu(self.bn4(self.conv4(x))))
        x = x.view(x.size(0), -1)  # Ensure the batch size is preserved
        x = torch.relu(self.bn5(self.fc1(x)))
        x = self.dropout(x)
        x = torch.relu(self.bn6(self.fc2(x)))
        x = self.fc3(x)
        return x

# Define the loss function and optimizer with L2 regularization
criterion = nn.CrossEntropyLoss()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ComplexCNNModel().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)  # Increased weight decay
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
# Function to save the model
def save_model(model, path):
    torch.save(model.state_dict(), path)

# Function to load the model
def load_model(model, path):
    model.load_state_dict(torch.load(path))

# Path to save the best model
model_path = '../Models/raw_data_cnn2.pth'
# Load the best model parameters for final evaluation or further training
load_model(model, model_path)

In [1]:
from torchviz import make_dot
import torch
from torch.utils.data import DataLoader, random_split
import numpy as np

model.eval()  # Set the model to evaluation mode

# Assuming standardized_data and y_one_hot are already defined and converted to tensors
standardized_data = torch.tensor(standardized_data, dtype=torch.float32).view(414, 14, 7680)
# y_one_hot = torch.tensor(y_one_hot, dtype=torch.float32)
# y_indices = torch.argmax(y_one_hot, dim=1)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Create a dataset
dataset = TensorDataset(standardized_data, y_indices)

# Split the dataset into training and test sets (e.g., 80% train, 20% test)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])
y = model(test_dataset)
make_dot(y.mean(), params=dict(model.named_parameters()))

NameError: name 'model' is not defined

In [73]:
import torch
from torch.utils.data import DataLoader, random_split
import numpy as np

model.eval()  # Set the model to evaluation mode

# Assuming standardized_data and y_one_hot are already defined and converted to tensors
standardized_data = torch.tensor(standardized_data, dtype=torch.float32).view(414, 14, 7680)
# y_one_hot = torch.tensor(y_one_hot, dtype=torch.float32)
# y_indices = torch.argmax(y_one_hot, dim=1)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Create a dataset
dataset = TensorDataset(standardized_data, y_indices)

# Split the dataset into training and test sets (e.g., 80% train, 20% test)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Create a DataLoader for the test dataset
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=True)

# Evaluate the model on the test dataset
correct_predictions = 0
total_predictions = 0
with torch.no_grad():
    for batch_x, batch_y in test_loader:
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
        outputs = model(batch_x)
        predicted_classes = torch.argmax(outputs, dim=1)
        correct_predictions += (predicted_classes == batch_y).sum().item()
        total_predictions += batch_x.size(0)

test_accuracy = correct_predictions / total_predictions * 100
print(f"Accuracy on test dataset: {test_accuracy:.2f}%")

Accuracy on test dataset: 93.98%


### The below model gave less accuracy than the one we have gotten

In [125]:
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader, Subset
import numpy as np
from sklearn.model_selection import KFold
import torch.optim as optim

# Assuming y_one_hot and standardized_data are already defined
# Convert y_one_hot to PyTorch tensor
# y_one_hot = torch.tensor(y_one_hot, dtype=torch.float32)

# Reshape standardized_data to fit the CNN input requirements
# Reshape each sample to (batch_size, in_channels, sequence_length)
standardized_data = torch.tensor(standardized_data, dtype=torch.float32).view(414, 14, 7680)

# Convert y_one_hot to class indices
# y_indices = torch.argmax(y_one_hot, dim=1)

# Create a dataset
dataset = TensorDataset(standardized_data, y_indices)

# Define a more complex CNN model
class ComplexCNNModel(nn.Module):
    def __init__(self):
        super(ComplexCNNModel, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=14, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm1d(32)
        self.pool = nn.MaxPool1d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv1d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm1d(64)
        self.conv3 = nn.Conv1d(64, 128, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm1d(128)
        self.conv4 = nn.Conv1d(128, 256, kernel_size=3, stride=1, padding=1)
        self.bn4 = nn.BatchNorm1d(256)
        self.fc1 = nn.Linear(256 * 480, 512)  # Adjusted input size
        self.bn5 = nn.BatchNorm1d(512)
        self.fc2 = nn.Linear(512, 128)
        self.bn6 = nn.BatchNorm1d(128)
        self.fc3 = nn.Linear(128, 9)
        self.dropout = nn.Dropout(0.5)  # Increased dropout rate

    def forward(self, x):
        x = self.pool(torch.relu(self.bn1(self.conv1(x))))
        x = self.pool(torch.relu(self.bn2(self.conv2(x))))
        x = self.pool(torch.relu(self.bn3(self.conv3(x))))
        x = self.pool(torch.relu(self.bn4(self.conv4(x))))
        x = x.view(x.size(0), -1)  # Ensure the batch size is preserved
        x = torch.relu(self.bn5(self.fc1(x)))
        x = self.dropout(x)
        x = torch.relu(self.bn6(self.fc2(x)))
        x = self.fc3(x)
        return x

# Define the loss function and optimizer with L2 regularization
criterion = nn.CrossEntropyLoss()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ComplexCNNModel().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)  # Increased weight decay
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

# Implement early stopping
class EarlyStopping:
    def __init__(self, patience=5, delta=0.001):
        self.patience = patience
        self.delta = delta
        self.best_score = None
        self.early_stop = False
        self.counter = 0

    def __call__(self, val_loss, model, path):
        score = -val_loss
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model, path)
        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model, path)
            self.counter = 0

    def save_checkpoint(self, val_loss, model, path):
        torch.save(model.state_dict(), path)

# Training loop with early stopping
early_stopping = EarlyStopping(patience=5, delta=0.001)
num_epochs = 30  # Increased number of epochs

# Function to save the model
def save_model(model, path):
    torch.save(model.state_dict(), path)

# Function to load the model
def load_model(model, path):
    model.load_state_dict(torch.load(path))

# Path to save the best model
model_path = '../Models/raw_data_cnn.pth'

# K-Fold Cross Validation
k_folds = 5
kf = KFold(n_splits=k_folds, shuffle=True)

# Initialize lists to store loss and accuracy
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []

for fold, (train_idx, val_idx) in enumerate(kf.split(dataset)):
    print(f'Fold {fold+1}/{k_folds}')
    
    train_subset = Subset(dataset, train_idx)
    val_subset = Subset(dataset, val_idx)
    train_loader = DataLoader(train_subset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_subset, batch_size=32, shuffle=False)
    
    model = ComplexCNNModel().to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.0005, weight_decay=1e-3)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
    
    best_accuracy = 0.0
    
    for epoch in range(num_epochs):
        model.train()
        epoch_loss = 0
        for batch_x, batch_y in train_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            optimizer.zero_grad()
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            epoch_loss += loss.item()
        
        train_losses.append(epoch_loss / len(train_loader))
        scheduler.step()
        
        print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {train_losses[-1]:.4f}')
        
        model.eval()
        train_loss = 0
        correct_train_predictions = 0
        total_train_predictions = 0
        with torch.no_grad():
            for batch_x, batch_y in train_loader:
                batch_x, batch_y = batch_x.to(device), batch_y.to(device)
                outputs = model(batch_x)
                loss = criterion(outputs, batch_y)
                train_loss += loss.item()
                predicted_classes = torch.argmax(outputs, dim=1)
                correct_train_predictions += (predicted_classes == batch_y).sum().item()
                total_train_predictions += batch_x.size(0)
        
        train_accuracy = correct_train_predictions / total_train_predictions * 100
        train_accuracies.append(train_accuracy)
        
        print(f"Accuracy on training set: {train_accuracy:.2f}%")
        
        val_loss = 0
        correct_predictions = 0
        total_predictions = 0
        with torch.no_grad():
            for batch_x, batch_y in val_loader:
                batch_x, batch_y = batch_x.to(device), batch_y.to(device)
                outputs = model(batch_x)
                loss = criterion(outputs, batch_y)
                val_loss += loss.item()
                predicted_classes = torch.argmax(outputs, dim=1)
                correct_predictions += (predicted_classes == batch_y).sum().item()
                total_predictions += batch_x.size(0)
        
        val_losses.append(val_loss / len(val_loader))
        accuracy = correct_predictions / total_predictions * 100
        val_accuracies.append(accuracy)

        print(f'Validation Loss: {val_losses[-1]:.4f}')
        
        print(f"Accuracy on validation set: {accuracy:.2f}%\n")
        
        early_stopping(val_loss, model, model_path)
        
        if early_stopping.early_stop:
            print("Early stopping")
            break

# Calculate mean and standard deviation of validation accuracies
mean_accuracy = np.mean(val_accuracies)
std_accuracy = np.std(val_accuracies)

print(f'Mean Validation Accuracy: {mean_accuracy:.2f}%')
print(f'Standard Deviation of Validation Accuracy: {std_accuracy:.2f}%')

# Load the best model parameters for final evaluation or further training
load_model(model, model_path)
print("Loaded the best model parameters for final evaluation or further training.")

Fold 1/5
Epoch [1/30], Training Loss: 2.2645
Accuracy on training set: 22.66%
Validation Loss: 2.2252
Accuracy on validation set: 8.43%

Epoch [2/30], Training Loss: 2.0877
Accuracy on training set: 23.87%
Validation Loss: 2.2175
Accuracy on validation set: 7.23%

Epoch [3/30], Training Loss: 1.7780
Accuracy on training set: 40.48%
Validation Loss: 2.1957
Accuracy on validation set: 13.25%

Epoch [4/30], Training Loss: 1.3975
Accuracy on training set: 71.90%
Validation Loss: 2.1185
Accuracy on validation set: 19.28%

Epoch [5/30], Training Loss: 1.0109
Accuracy on training set: 94.26%
Validation Loss: 1.9609
Accuracy on validation set: 28.92%

Epoch [6/30], Training Loss: 0.7349
Accuracy on training set: 98.49%
Validation Loss: 1.8874
Accuracy on validation set: 33.73%

Epoch [7/30], Training Loss: 0.5506
Accuracy on training set: 100.00%
Validation Loss: 1.9100
Accuracy on validation set: 31.33%

Epoch [8/30], Training Loss: 0.4617
Accuracy on training set: 100.00%
Validation Loss: 1.

### Hyperparameter tuning

In [148]:
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader, Subset
import numpy as np
from sklearn.model_selection import KFold
import torch.optim as optim

# Assuming y_one_hot and standardized_data are already defined
# Convert y_one_hot to PyTorch tensor
# y_one_hot = torch.tensor(y_one_hot, dtype=torch.float32)

# Reshape standardized_data to fit the CNN input requirements
# Reshape each sample to (batch_size, in_channels, sequence_length)
standardized_data = torch.tensor(standardized_data, dtype=torch.float32).view(414, 14, 7680)

# Convert y_one_hot to class indices
# y_indices = torch.argmax(y_one_hot, dim=1)

# Create a dataset
dataset = TensorDataset(standardized_data, y_indices)

# Define a more complex CNN model
class ComplexCNNModel(nn.Module):
    def __init__(self, dropout_rate=0.5):
        super(ComplexCNNModel, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=14, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm1d(32)
        self.pool = nn.MaxPool1d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv1d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm1d(64)
        self.conv3 = nn.Conv1d(64, 128, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm1d(128)
        self.conv4 = nn.Conv1d(128, 256, kernel_size=3, stride=1, padding=1)
        self.bn4 = nn.BatchNorm1d(256)
        self.fc1 = nn.Linear(256 * 480, 512)  # Adjusted input size
        self.bn5 = nn.BatchNorm1d(512)
        self.fc2 = nn.Linear(512, 128)
        self.bn6 = nn.BatchNorm1d(128)
        self.fc3 = nn.Linear(128, 9)
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        x = self.pool(torch.relu(self.bn1(self.conv1(x))))
        x = self.pool(torch.relu(self.bn2(self.conv2(x))))
        x = self.pool(torch.relu(self.bn3(self.conv3(x))))
        x = self.pool(torch.relu(self.bn4(self.conv4(x))))
        x = x.view(x.size(0), -1)  # Ensure the batch size is preserved
        x = torch.relu(self.bn5(self.fc1(x)))
        x = self.dropout(x)
        x = torch.relu(self.bn6(self.fc2(x)))
        x = self.fc3(x)
        return x

# Define the loss function
criterion = nn.CrossEntropyLoss()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Implement early stopping
class EarlyStopping:
    def __init__(self, patience=5, delta=0.001):
        self.patience = patience
        self.delta = delta
        self.best_score = None
        self.early_stop = False
        self.counter = 0

    def __call__(self, val_loss, model, path):
        score = -val_loss
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model, path)
        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model, path)
            self.counter = 0

    def save_checkpoint(self, val_loss, model, path):
        torch.save(model.state_dict(), path)

# Function to save the model
def save_model(model, path):
    torch.save(model.state_dict(), path)

# Function to load the model
def load_model(model, path):
    model.load_state_dict(torch.load(path))

# Path to save the best model
model_path = '../Models/raw_data_cnn.pth'

# K-Fold Cross Validation
k_folds = 5
kf = KFold(n_splits=k_folds, shuffle=True)

# Hyperparameter tuning
learning_rates = [0.001, 0.0005]
weight_decays = [1e-4, 1e-5]
dropout_rates = [0.5, 0.3]

best_mean_accuracy = 0.0
best_hyperparams = {}

for lr in learning_rates:
    for wd in weight_decays:
        for dr in dropout_rates:
            print(f'Tuning with lr={lr}, wd={wd}, dr={dr}')
            
            # Initialize lists to store loss and accuracy
            val_accuracies = []

            for fold, (train_idx, val_idx) in enumerate(kf.split(dataset)):
                print(f'Fold {fold+1}/{k_folds}')
                
                train_subset = Subset(dataset, train_idx)
                val_subset = Subset(dataset, val_idx)
                train_loader = DataLoader(train_subset, batch_size=32, shuffle=True)
                val_loader = DataLoader(val_subset, batch_size=32, shuffle=False)
                
                model = ComplexCNNModel(dropout_rate=dr).to(device)
                optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
                scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
                
                early_stopping = EarlyStopping(patience=10, delta=0.01)
                num_epochs = 30
                
                for epoch in range(num_epochs):
                    model.train()
                    for batch_x, batch_y in train_loader:
                        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
                        optimizer.zero_grad()
                        outputs = model(batch_x)
                        loss = criterion(outputs, batch_y)
                        loss.backward()
                        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                        optimizer.step()
                    
                    scheduler.step()
                    
                    model.eval()
                    val_loss = 0
                    correct_predictions = 0
                    total_predictions = 0
                    with torch.no_grad():
                        for batch_x, batch_y in val_loader:
                            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
                            outputs = model(batch_x)
                            loss = criterion(outputs, batch_y)
                            val_loss += loss.item()
                            predicted_classes = torch.argmax(outputs, dim=1)
                            correct_predictions += (predicted_classes == batch_y).sum().item()
                            total_predictions += batch_x.size(0)
                    
                    accuracy = correct_predictions / total_predictions * 100
                    val_accuracies.append(accuracy)
                    
                    early_stopping(val_loss, model, model_path)
                    
                    if early_stopping.early_stop:
                        print("Early stopping")
                        break

            mean_accuracy = np.mean(val_accuracies)
            print(f'Mean Validation Accuracy: {mean_accuracy:.2f}%')

            if mean_accuracy > best_mean_accuracy:
                best_mean_accuracy = mean_accuracy
                best_hyperparams = {'lr': lr, 'wd': wd, 'dr': dr}

print(f'Best Hyperparameters: {best_hyperparams}')
print(f'Best Mean Validation Accuracy: {best_mean_accuracy:.2f}%')

# Load the best model parameters for final evaluation or further training
load_model(model, model_path)
print("Loaded the best model parameters for final evaluation or further training.")

Tuning with lr=0.001, wd=0.0001, dr=0.5
Fold 1/5


KeyboardInterrupt: 

In [155]:
import itertools

# Define hyperparameter ranges
learning_rates = [0.001, 0.0005, 0.0001]
weight_decays = [1e-4, 1e-5, 1e-6]
dropout_rates = [0.5, 0.3, 0.2]
batch_sizes = [32, 64]
num_epochs_list = [30, 50]

# Function to perform hyperparameter tuning
def hyperparameter_tuning():
    best_accuracy = 0
    best_hyperparams = {}
    
    for lr, wd, dr, batch_size, num_epochs in itertools.product(learning_rates, weight_decays, dropout_rates, batch_sizes, num_epochs_list):
        print(f"Tuning with lr={lr}, wd={wd}, dr={dr}, batch_size={batch_size}, num_epochs={num_epochs}")
        
        k_folds = 5
        kf = KFold(n_splits=k_folds, shuffle=True)
        
        val_accuracies = []
        
        for fold, (train_idx, val_idx) in enumerate(kf.split(dataset)):
            print(f'Fold {fold+1}/{k_folds}')
            
            train_subset = Subset(dataset, train_idx)
            val_subset = Subset(dataset, val_idx)
            train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
            val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False)
            
            model = ComplexCNNModel(dropout_rate=dr).to(device)
            optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
            scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
            early_stopping = EarlyStopping(patience=10, delta=0.01)
            
            for epoch in range(num_epochs):
                model.train()
                for batch_x, batch_y in train_loader:
                    batch_x, batch_y = batch_x.to(device), batch_y.to(device)
                    optimizer.zero_grad()
                    outputs = model(batch_x)
                    loss = criterion(outputs, batch_y)
                    loss.backward()
                    nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                    optimizer.step()
                
                scheduler.step()
                
                model.eval()
                val_loss = 0
                correct_predictions = 0
                total_predictions = 0
                with torch.no_grad():
                    for batch_x, batch_y in val_loader:
                        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
                        outputs = model(batch_x)
                        loss = criterion(outputs, batch_y)
                        val_loss += loss.item()
                        predicted_classes = torch.argmax(outputs, dim=1)
                        correct_predictions += (predicted_classes == batch_y).sum().item()
                        total_predictions += batch_x.size(0)
                
                accuracy = correct_predictions / total_predictions * 100
                val_accuracies.append(accuracy)
                
                early_stopping(val_loss, model, model_path)
                
                if early_stopping.early_stop:
                    print("Early stopping")
                    break
        
        mean_accuracy = np.mean(val_accuracies)
        print(f'Mean Validation Accuracy: {mean_accuracy:.2f}%')
        
        if mean_accuracy > best_accuracy:
            best_accuracy = mean_accuracy
            best_hyperparams = {'lr': lr, 'wd': wd, 'dr': dr, 'batch_size': batch_size, 'num_epochs': num_epochs}
    
    print(f'Best Hyperparameters: {best_hyperparams}')
    print(f'Best Mean Validation Accuracy: {best_accuracy:.2f}%')

# Run hyperparameter tuning
hyperparameter_tuning()

Tuning with lr=0.001, wd=0.0001, dr=0.5, batch_size=32, num_epochs=30
Fold 1/5
Early stopping
Fold 2/5
Fold 3/5
Early stopping
Fold 4/5
Early stopping
Fold 5/5
Early stopping
Mean Validation Accuracy: 41.58%
Tuning with lr=0.001, wd=0.0001, dr=0.5, batch_size=32, num_epochs=50
Fold 1/5
Early stopping
Fold 2/5
Early stopping
Fold 3/5
Early stopping
Fold 4/5
Early stopping
Fold 5/5
Early stopping
Mean Validation Accuracy: 40.18%
Tuning with lr=0.001, wd=0.0001, dr=0.5, batch_size=64, num_epochs=30
Fold 1/5
Early stopping
Fold 2/5
Fold 3/5
Early stopping
Fold 4/5
Fold 5/5
Early stopping
Mean Validation Accuracy: 34.06%
Tuning with lr=0.001, wd=0.0001, dr=0.5, batch_size=64, num_epochs=50
Fold 1/5
Early stopping
Fold 2/5
Early stopping
Fold 3/5
Early stopping
Fold 4/5
Early stopping
Fold 5/5
Early stopping
Mean Validation Accuracy: 34.86%
Tuning with lr=0.001, wd=0.0001, dr=0.3, batch_size=32, num_epochs=30
Fold 1/5
Early stopping
Fold 2/5
Fold 3/5
Early stopping
Fold 4/5
Early stopping
Fo

### Evaluating the best model's mean validation accuracy

In [156]:
best_hyperparams = {'lr': 0.001, 'wd': 1e-05, 'dr': 0.5, 'batch_size': 32, 'num_epochs': 50}

In [157]:
# Function to evaluate the final model
def evaluate_final_model(best_hyperparams):
    lr = best_hyperparams['lr']
    wd = best_hyperparams['wd']
    dr = best_hyperparams['dr']
    batch_size = best_hyperparams['batch_size']
    
    k_folds = 5
    kf = KFold(n_splits=k_folds, shuffle=True)
    
    val_accuracies = []
    
    for fold, (train_idx, val_idx) in enumerate(kf.split(dataset)):
        print(f'Fold {fold+1}/{k_folds}')
        
        val_subset = Subset(dataset, val_idx)
        val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False)
        
        model = ComplexCNNModel(dropout_rate=dr).to(device)
        model.load_state_dict(torch.load(model_path))
        model.eval()
        
        correct_predictions = 0
        total_predictions = 0
        with torch.no_grad():
            for batch_x, batch_y in val_loader:
                batch_x, batch_y = batch_x.to(device), batch_y.to(device)
                outputs = model(batch_x)
                predicted_classes = torch.argmax(outputs, dim=1)
                correct_predictions += (predicted_classes == batch_y).sum().item()
                total_predictions += batch_x.size(0)
        
        accuracy = correct_predictions / total_predictions * 100
        val_accuracies.append(accuracy)
    
    mean_accuracy = np.mean(val_accuracies)
    print(f'Final Model Mean Validation Accuracy: {mean_accuracy:.2f}%')

evaluate_final_model(best_hyperparams)

Fold 1/5
Fold 2/5
Fold 3/5
Fold 4/5
Fold 5/5
Final Model Mean Validation Accuracy: 85.27%


### The below model is simplified version of CNN model

In [144]:
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader, Subset
import numpy as np
from sklearn.model_selection import KFold
import torch.optim as optim

# Assuming y_one_hot and standardized_data are already defined
# Convert y_one_hot to PyTorch tensor
# y_one_hot = torch.tensor(y_one_hot, dtype=torch.float32)

# Reshape standardized_data to fit the CNN input requirements
# Reshape each sample to (batch_size, in_channels, sequence_length)
standardized_data = torch.tensor(standardized_data, dtype=torch.float32).view(414, 14, 7680)

# Convert y_one_hot to class indices
# y_indices = torch.argmax(y_one_hot, dim=1)

# Create a dataset
dataset = TensorDataset(standardized_data, y_indices)

# Define a more complex CNN model
class ComplexCNNModel(nn.Module):
    def __init__(self):
        super(ComplexCNNModel, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=14, out_channels=16, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm1d(16)
        self.pool = nn.MaxPool1d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv1d(16, 32, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm1d(32)
        self.fc1 = nn.Linear(32 * 1920, 256)  # Adjusted input size
        self.bn3 = nn.BatchNorm1d(256)
        self.fc2 = nn.Linear(256, 9)
        self.dropout = nn.Dropout(0.3)

    def forward(self, x):
        x = self.pool(torch.relu(self.bn1(self.conv1(x))))
        x = self.pool(torch.relu(self.bn2(self.conv2(x))))
        x = x.view(x.size(0), -1)
        x = torch.relu(self.bn3(self.fc1(x)))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# Define the loss function and optimizer with L2 regularization
criterion = nn.CrossEntropyLoss()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ComplexCNNModel().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)  # Increased weight decay
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

# Implement early stopping
class EarlyStopping:
    def __init__(self, patience=10, delta=0.01):
        self.patience = patience
        self.delta = delta
        self.best_score = None
        self.early_stop = False
        self.counter = 0

    def __call__(self, val_loss, model, path):
        score = -val_loss
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model, path)
        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model, path)
            self.counter = 0

    def save_checkpoint(self, val_loss, model, path):
        torch.save(model.state_dict(), path)

# Training loop with early stopping
early_stopping = EarlyStopping(patience=10, delta=0.01)
num_epochs = 30  # Increased number of epochs

# Function to save the model
def save_model(model, path):
    torch.save(model.state_dict(), path)

# Function to load the model
def load_model(model, path):
    model.load_state_dict(torch.load(path))

# Path to save the best model
model_path = '../Models/raw_data_cnn_simplified.pth'

# K-Fold Cross Validation
k_folds = 5
kf = KFold(n_splits=k_folds, shuffle=True)

# Initialize lists to store loss and accuracy
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []

for fold, (train_idx, val_idx) in enumerate(kf.split(dataset)):
    print(f'Fold {fold+1}/{k_folds}')
    
    train_subset = Subset(dataset, train_idx)
    val_subset = Subset(dataset, val_idx)
    train_loader = DataLoader(train_subset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_subset, batch_size=32, shuffle=False)
    
    model = ComplexCNNModel().to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-3)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
    
    best_accuracy = 0.0
    
    for epoch in range(num_epochs):
        model.train()
        epoch_loss = 0
        for batch_x, batch_y in train_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            optimizer.zero_grad()
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            epoch_loss += loss.item()
        
        train_losses.append(epoch_loss / len(train_loader))
        
        print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {train_losses[-1]:.4f}')
        
        model.eval()
        train_loss = 0
        correct_train_predictions = 0
        total_train_predictions = 0
        with torch.no_grad():
            for batch_x, batch_y in train_loader:
                batch_x, batch_y = batch_x.to(device), batch_y.to(device)
                outputs = model(batch_x)
                loss = criterion(outputs, batch_y)
                train_loss += loss.item()
                predicted_classes = torch.argmax(outputs, dim=1)
                correct_train_predictions += (predicted_classes == batch_y).sum().item()
                total_train_predictions += batch_x.size(0)
        
        train_accuracy = correct_train_predictions / total_train_predictions * 100
        train_accuracies.append(train_accuracy)
        
        print(f"Accuracy on training set: {train_accuracy:.2f}%")
        
        val_loss = 0
        correct_predictions = 0
        total_predictions = 0
        with torch.no_grad():
            for batch_x, batch_y in val_loader:
                batch_x, batch_y = batch_x.to(device), batch_y.to(device)
                outputs = model(batch_x)
                loss = criterion(outputs, batch_y)
                val_loss += loss.item()
                predicted_classes = torch.argmax(outputs, dim=1)
                correct_predictions += (predicted_classes == batch_y).sum().item()
                total_predictions += batch_x.size(0)
        
        val_losses.append(val_loss / len(val_loader))
        accuracy = correct_predictions / total_predictions * 100
        val_accuracies.append(accuracy)

        print(f'Validation Loss: {val_losses[-1]:.4f}')
        
        print(f"Accuracy on validation set: {accuracy:.2f}%\n")
        
        scheduler.step(val_loss)
        early_stopping(val_loss, model, model_path)
        
        if early_stopping.early_stop:
            print("Early stopping")
            break

# Load the best model parameters for final evaluation or further training
load_model(model, model_path)
print("Loaded the best model parameters for final evaluation or further training.")

Fold 1/5
Epoch [1/30], Training Loss: 2.3254
Accuracy on training set: 61.33%
Validation Loss: 2.2622
Accuracy on validation set: 10.84%

Epoch [2/30], Training Loss: 1.9431
Accuracy on training set: 48.34%
Validation Loss: 2.2311
Accuracy on validation set: 8.43%

Epoch [3/30], Training Loss: 1.6677
Accuracy on training set: 79.76%
Validation Loss: 2.2393
Accuracy on validation set: 15.66%

Epoch [4/30], Training Loss: 1.3339
Accuracy on training set: 92.15%
Validation Loss: 2.2123
Accuracy on validation set: 16.87%

Epoch [5/30], Training Loss: 0.9659
Accuracy on training set: 97.28%
Validation Loss: 2.1491
Accuracy on validation set: 20.48%

Epoch [6/30], Training Loss: 0.6004
Accuracy on training set: 99.40%
Validation Loss: 2.0496
Accuracy on validation set: 28.92%

Epoch [7/30], Training Loss: 0.3396
Accuracy on training set: 100.00%
Validation Loss: 1.9714
Accuracy on validation set: 30.12%

Epoch [8/30], Training Loss: 0.1768
Accuracy on training set: 100.00%
Validation Loss: 1

### Old Models

In [53]:
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader, Subset
import numpy as np
from sklearn.model_selection import KFold
import torch.optim as optim

# Assuming y_one_hot is already defined
# Convert y_one_hot to PyTorch tensor
# y_one_hot = torch.tensor(y_one_hot, dtype=torch.float32)

# Reshape standardized_data to fit the CNN input requirements
# Reshape each sample to (batch_size, in_channels, sequence_length)
standardized_data = standardized_data.view(414, 14, 7680)

# Convert y_one_hot to class indices
# y_indices = torch.argmax(y_one_hot, dim=1)

# Create a dataset
dataset = TensorDataset(standardized_data, y_indices)

# Define a simpler CNN model
class SimpleCNNModel(nn.Module):
    def __init__(self):
        super(SimpleCNNModel, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=14, out_channels=16, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm1d(16)
        self.pool = nn.MaxPool1d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv1d(16, 32, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm1d(32)
        self.fc1 = nn.Linear(32 * 1920, 128)
        self.bn3 = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, 9)
        self.dropout = nn.Dropout(0.5)  # Moderate dropout rate

    def forward(self, x):
        x = self.pool(torch.relu(self.bn1(self.conv1(x))))
        x = self.pool(torch.relu(self.bn2(self.conv2(x))))
        x = x.view(-1, 32 * 1920)
        x = torch.relu(self.bn3(self.fc1(x)))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# Define the loss function and optimizer with L2 regularization
criterion = nn.CrossEntropyLoss()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimpleCNNModel().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-4)  # Increased weight decay
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

# Implement early stopping
class EarlyStopping:
    def __init__(self, patience=10, delta=0.01):
        self.patience = patience
        self.delta = delta
        self.best_score = None
        self.early_stop = False
        self.counter = 0

    def __call__(self, val_loss, model, path):
        score = -val_loss
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model, path)
        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model, path)
            self.counter = 0

    def save_checkpoint(self, val_loss, model, path):
        torch.save(model.state_dict(), path)

# Training loop with early stopping
early_stopping = EarlyStopping(patience=10, delta=0.01)
num_epochs = 20

# Function to save the model
def save_model(model, path):
    torch.save(model.state_dict(), path)

# Function to load the model
def load_model(model, path):
    model.load_state_dict(torch.load(path))

# Path to save the best model
model_path = '../Models/raw_data_cnn.pth'

# K-Fold Cross Validation
k_folds = 5
kf = KFold(n_splits=k_folds, shuffle=True)

# Initialize lists to store loss and accuracy
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []

for fold, (train_idx, val_idx) in enumerate(kf.split(dataset)):
    print(f'Fold {fold+1}/{k_folds}')
    
    train_subset = Subset(dataset, train_idx)
    val_subset = Subset(dataset, val_idx)
    train_loader = DataLoader(train_subset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_subset, batch_size=32, shuffle=False)
    
    model = SimpleCNNModel().to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
    
    best_accuracy = 0.0
    
    for epoch in range(num_epochs):
        model.train()
        epoch_loss = 0
        for batch_x, batch_y in train_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            optimizer.zero_grad()
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            epoch_loss += loss.item()
        
        train_losses.append(epoch_loss / len(train_loader))
        scheduler.step()
        
        print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {train_losses[-1]:.4f}')
        
        model.eval()
        train_loss = 0
        correct_train_predictions = 0
        total_train_predictions = 0
        with torch.no_grad():
            for batch_x, batch_y in train_loader:
                batch_x, batch_y = batch_x.to(device), batch_y.to(device)
                outputs = model(batch_x)
                loss = criterion(outputs, batch_y)
                train_loss += loss.item()
                predicted_classes = torch.argmax(outputs, dim=1)
                correct_train_predictions += (predicted_classes == batch_y).sum().item()
                total_train_predictions += batch_x.size(0)
        
        train_accuracy = correct_train_predictions / total_train_predictions * 100
        train_accuracies.append(train_accuracy)
        
        print(f"Accuracy on training set: {train_accuracy:.2f}%")
        
        val_loss = 0
        correct_predictions = 0
        total_predictions = 0
        with torch.no_grad():
            for batch_x, batch_y in val_loader:
                batch_x, batch_y = batch_x.to(device), batch_y.to(device)
                outputs = model(batch_x)
                loss = criterion(outputs, batch_y)
                val_loss += loss.item()
                predicted_classes = torch.argmax(outputs, dim=1)
                correct_predictions += (predicted_classes == batch_y).sum().item()
                total_predictions += batch_x.size(0)
        
        val_losses.append(val_loss / len(val_loader))
        accuracy = correct_predictions / total_predictions * 100
        val_accuracies.append(accuracy)

        print(f'Validation Loss: {val_losses[-1]:.4f}')
        
        print(f"Accuracy on validation set: {accuracy:.2f}%\n")
        
        early_stopping(val_loss, model, model_path)
        
        if early_stopping.early_stop:
            print("Early stopping")
            break

# Load the best model parameters for final evaluation or further training
load_model(model, model_path)
print("Loaded the best model parameters for final evaluation or further training.")


Fold 1/5
Epoch [1/20], Training Loss: 2.3695
Accuracy on training set: 35.65%
Validation Loss: 2.4258
Accuracy on validation set: 18.07%

Epoch [2/20], Training Loss: 2.1110
Accuracy on training set: 42.60%
Validation Loss: 2.2312
Accuracy on validation set: 15.66%

Epoch [3/20], Training Loss: 1.9401
Accuracy on training set: 60.12%
Validation Loss: 2.1821
Accuracy on validation set: 14.46%

Epoch [4/20], Training Loss: 1.8425
Accuracy on training set: 58.31%
Validation Loss: 2.1835
Accuracy on validation set: 14.46%

Epoch [5/20], Training Loss: 1.7187
Accuracy on training set: 71.30%
Validation Loss: 2.2092
Accuracy on validation set: 16.87%

Epoch [6/20], Training Loss: 1.6205
Accuracy on training set: 83.69%
Validation Loss: 2.1933
Accuracy on validation set: 18.07%

Epoch [7/20], Training Loss: 1.4883
Accuracy on training set: 87.31%
Validation Loss: 2.1846
Accuracy on validation set: 15.66%

Epoch [8/20], Training Loss: 1.4174
Accuracy on training set: 88.82%
Validation Loss: 2.

In [46]:
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader, Subset
import numpy as np
import pandas as pd
import torch.optim as optim
from sklearn.model_selection import KFold

# Assuming y_one_hot is already defined
# Convert y_one_hot to PyTorch tensor
# y_one_hot = torch.tensor(y_one_hot, dtype=torch.float32)

# Reshape standardized_data to fit the CNN input requirements
# Reshape each sample to (batch_size, in_channels, sequence_length)
standardized_data = standardized_data.view(414, 14, 7680)

# Convert y_one_hot to class indices
# y_indices = torch.argmax(y_one_hot, dim=1)

# Create a dataset
dataset = TensorDataset(standardized_data, y_indices)

# Define the CNN model with increased dropout
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=14, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm1d(32)
        self.pool = nn.MaxPool1d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv1d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm1d(64)
        self.conv3 = nn.Conv1d(64, 128, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm1d(128)
        self.fc1 = nn.Linear(128 * 960, 512)
        self.bn4 = nn.BatchNorm1d(512)
        self.fc2 = nn.Linear(512, 128)
        self.bn5 = nn.BatchNorm1d(128)
        self.fc3 = nn.Linear(128, 9)
        self.dropout = nn.Dropout(0.5)  # Increased dropout rate

    def forward(self, x):
        x = self.pool(torch.relu(self.bn1(self.conv1(x))))
        x = self.pool(torch.relu(self.bn2(self.conv2(x))))
        x = self.pool(torch.relu(self.bn3(self.conv3(x))))
        x = x.view(-1, 128 * 960)
        x = torch.relu(self.bn4(self.fc1(x)))
        x = self.dropout(x)
        x = torch.relu(self.bn5(self.fc2(x)))
        x = self.fc3(x)
        return x

# Define the loss function and optimizer with L2 regularization
criterion = nn.CrossEntropyLoss()
model = CNNModel()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)  # Increased weight decay
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

# Implement early stopping
class EarlyStopping:
    def __init__(self, patience=10, delta=0.01):
        self.patience = patience
        self.delta = delta
        self.best_score = None
        self.early_stop = False
        self.counter = 0

    def __call__(self, val_loss, model, path):
        score = -val_loss
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model, path)
        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model, path)
            self.counter = 0

    def save_checkpoint(self, val_loss, model, path):
        torch.save(model.state_dict(), path)

# Training loop with early stopping
early_stopping = EarlyStopping(patience=10, delta=0.01)
num_epochs = 20

# Function to save the model
def save_model(model, path):
    torch.save(model.state_dict(), path)

# Function to load the model
def load_model(model, path):
    model.load_state_dict(torch.load(path))

# Path to save the best model
model_path = '../Models/raw_data_cnn.pth'

# K-Fold Cross Validation
k_folds = 5
kf = KFold(n_splits=k_folds, shuffle=True)

# Initialize lists to store loss and accuracy
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []

for fold, (train_idx, val_idx) in enumerate(kf.split(dataset)):
    print(f'Fold {fold+1}/{k_folds}')
    
    train_subset = Subset(dataset, train_idx)
    val_subset = Subset(dataset, val_idx)
    train_loader = DataLoader(train_subset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_subset, batch_size=32, shuffle=False)
    
    model = CNNModel()
    optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
    
    best_accuracy = 0.0
    
    for epoch in range(num_epochs):
        model.train()
        epoch_loss = 0
        for batch_x, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            epoch_loss += loss.item()
        
        train_losses.append(epoch_loss / len(train_loader))
        scheduler.step()
        
        print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {train_losses[-1]:.4f}')
        
        model.eval()
        train_loss = 0
        correct_train_predictions = 0
        total_train_predictions = 0
        with torch.no_grad():
            for batch_x, batch_y in train_loader:
                outputs = model(batch_x)
                loss = criterion(outputs, batch_y)
                train_loss += loss.item()
                predicted_classes = torch.argmax(outputs, dim=1)
                correct_train_predictions += (predicted_classes == batch_y).sum().item()
                total_train_predictions += batch_x.size(0)
        
        train_accuracy = correct_train_predictions / total_train_predictions * 100
        train_accuracies.append(train_accuracy)
        
        print(f"Accuracy on training set: {train_accuracy:.2f}%")
        
        val_loss = 0
        correct_predictions = 0
        total_predictions = 0
        with torch.no_grad():
            for batch_x, batch_y in val_loader:
                outputs = model(batch_x)
                loss = criterion(outputs, batch_y)
                val_loss += loss.item()
                predicted_classes = torch.argmax(outputs, dim=1)
                correct_predictions += (predicted_classes == batch_y).sum().item()
                total_predictions += batch_x.size(0)
        
        val_losses.append(val_loss / len(val_loader))
        accuracy = correct_predictions / total_predictions * 100
        val_accuracies.append(accuracy)

        print(f'Validation Loss: {val_losses[-1]:.4f}')
        
        print(f"Accuracy on validation set: {accuracy:.2f}%\n")
        
        early_stopping(val_loss, model, model_path)
        
        if early_stopping.early_stop:
            print("Early stopping")
            break

# Load the best model parameters for final evaluation or further training
load_model(model, model_path)
print("Loaded the best model parameters for final evaluation or further training.")

Fold 1/5
Epoch [1/20], Training Loss: 2.2689
Accuracy on training set: 25.68%
Validation Loss: 2.2302
Accuracy on validation set: 9.64%

Epoch [2/20], Training Loss: 2.0621
Accuracy on training set: 33.84%
Validation Loss: 2.2448
Accuracy on validation set: 9.64%

Epoch [3/20], Training Loss: 1.9216
Accuracy on training set: 48.04%
Validation Loss: 2.2373
Accuracy on validation set: 6.02%

Epoch [4/20], Training Loss: 1.7779


KeyboardInterrupt: 

In [43]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

# Sample data creation for illustration
# X_imf should be replaced with your actual dataframe
# Assuming X_imf is a dataframe with lists of 7680 elements

# Function to flatten columns containing lists into a 2D array
def flatten_columns(df):
    return np.array(df.iloc[:, :len(df.columns)].values.tolist()).reshape(df.shape[0], -1)

flattened_data = flatten_columns(X)

# Verify the shape of the flattened data
print(f'Flattened data shape: {flattened_data.shape}')


Flattened data shape: (414, 107520)


In [101]:
# Standardize the flattened data
scaler = StandardScaler()
standardized_data = scaler.fit_transform(flattened_data)

# Verify the shape and mean/std of standardized data
print(f'Standardized data shape: {standardized_data.shape}')
print(f'Mean of standardized data: {np.mean(standardized_data, axis=0)}')
print(f'Standard deviation of standardized data: {np.std(standardized_data, axis=0)}')


Standardized data shape: (414, 107520)
Mean of standardized data: [ 8.91664627e-18 -3.82141983e-18  2.18558397e-17 ...  6.10086324e-18
  3.23144624e-17 -4.62592927e-18]
Standard deviation of standardized data: [1. 1. 1. ... 1. 1. 1.]


In [102]:
standardized_data

array([[-0.06130696,  0.04392667,  0.14352059, ...,  0.08859186,
         0.25883853,  0.14559242],
       [ 0.01684903,  0.10069246,  0.15104969, ...,  0.01624005,
        -0.09056394, -0.05986159],
       [ 0.07276537,  0.02416881,  0.02411968, ..., -0.90405031,
        -0.70633722, -0.67074667],
       ...,
       [-0.16546342, -0.15852624, -0.10869809, ..., -0.10660791,
        -0.08593933, -0.0496164 ],
       [-0.02926027, -0.00694558,  0.04855048, ...,  0.00433739,
        -0.02707823, -0.05355139],
       [ 0.1326224 ,  0.12510463,  0.18145423, ..., -0.2165923 ,
        -0.24471244, -0.21426181]])

## Creating model using raw EEG data

In [46]:
# Split data into training and test sets
from sklearn.model_selection import train_test_split, GridSearchCV
X_train, X_test, y_train, y_test = train_test_split(standardized_data, y, test_size=0.2, random_state=42)

# Define the parameter grid for hyperparameter tuning
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

# Initialize the Random Forest classifier
classifier = RandomForestClassifier(random_state=42)

# Use GridSearchCV to find the best hyperparameters
grid_search = GridSearchCV(estimator=classifier, param_grid=param_grid, cv=3, n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)

# Print the best parameters found by GridSearchCV
print(f'Best parameters found: {grid_search.best_params_}')

# Train the model with the best parameters
best_classifier = grid_search.best_estimator_
best_classifier.fit(X_train, y_train)

# Predict on test data
y_pred = best_classifier.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

# Detailed classification report
print('Classification Report:')
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

Fitting 3 folds for each of 216 candidates, totalling 648 fits
Best parameters found: {'bootstrap': False, 'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 10, 'n_estimators': 200}
Accuracy: 0.22
Classification Report:
              precision    recall  f1-score   support

   amusement       0.27      0.38      0.32         8
       anger       0.33      0.22      0.27         9
    calmness       0.20      0.09      0.12        11
     disgust       0.44      0.33      0.38        12
  excitement       0.33      0.27      0.30        11
        fear       0.29      0.18      0.22        11
   happiness       0.12      0.12      0.12         8
     sadness       0.00      0.00      0.00         8
    surprise       0.12      0.40      0.19         5

    accuracy                           0.22        83
   macro avg       0.24      0.22      0.21        83
weighted avg       0.25      0.22      0.22        83



In [82]:
import torch
# setting device on GPU if available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

#Additional Info when using cuda
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')

Using device: cuda

NVIDIA RTX A2000 12GB
Memory Usage:
Allocated: 0.0 GB
Cached:    0.0 GB


In [83]:
import torch.nn as nn
import torch.optim as optim

# Assuming you have standardized_data and y
# Convert standardized_data and y to PyTorch tensors
standardized_data = torch.tensor(standardized_data, dtype=torch.float32)
y_one_hot = torch.tensor(y_one_hot, dtype=torch.float32)

# Define the model
class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.fc1 = nn.Linear(107520, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 9)  # Assuming y is one-hot encoded with 9 classes

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model = SimpleModel()

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()  # Use CrossEntropyLoss for multi-class classification
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
num_epochs = 10
batch_size = 32

for epoch in range(num_epochs):
    permutation = torch.randperm(standardized_data.size()[0])
    
    for i in range(0, standardized_data.size()[0], batch_size):
        indices = permutation[i:i+batch_size]
        batch_x, batch_y = standardized_data[indices], y_one_hot[indices]
        
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(batch_x)
        
        # Compute the loss
        loss = criterion(outputs, torch.max(batch_y, 1)[1])
        
        # Backward pass and optimize
        loss.backward()
        optimizer.step()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Make predictions
with torch.no_grad():
    predictions = model(standardized_data)

# Print the predictions
print(predictions)

Epoch [1/10], Loss: 9.9654
Epoch [2/10], Loss: 1.7230
Epoch [3/10], Loss: 1.0408
Epoch [4/10], Loss: 0.2875
Epoch [5/10], Loss: 0.1479
Epoch [6/10], Loss: 0.1144
Epoch [7/10], Loss: 0.0419
Epoch [8/10], Loss: 0.0483
Epoch [9/10], Loss: 0.0321
Epoch [10/10], Loss: 0.0272
tensor([[ -2.4766,   0.3382,   7.9686,  ...,  -4.3074,  -2.1992,  -6.1776],
        [  1.6820,  -4.5621,   2.1664,  ...,   1.6001,   0.1249,   8.0359],
        [ 12.0344,  -4.2796, -11.4532,  ...,  -4.5454,  -1.0308,  -2.1525],
        ...,
        [ -0.4063,  -1.0266,  -0.9628,  ...,   1.9722,  -2.7729,   1.6038],
        [  1.2529,  -1.2286,  -2.4018,  ...,  -3.3884,   7.7119,   1.0661],
        [  0.1546,  -3.0245,   0.3837,  ...,  -0.8957,   1.1648,   6.4990]])


In [84]:
predictions[0]

tensor([-2.4766,  0.3382,  7.9686,  2.0732, -1.0617,  0.3879, -4.3074, -2.1992,
        -6.1776])

In [88]:
# Convert model outputs to class predictions
predicted_classes = torch.argmax(predictions, dim=1)

# Convert one-hot encoded labels to class indices
true_classes = torch.argmax(y_one_hot, dim=1)

# Calculate the accuracy
correct_predictions = (predicted_classes == true_classes).sum().item()
accuracy = correct_predictions / len(true_classes) * 100

# Print the predicted classes, true classes, and accuracy
print("Predicted Classes:", predicted_classes)
print("True Classes:", true_classes)
print(f"Accuracy: {accuracy:.2f}%")

Predicted Classes: tensor([2, 8, 0, 5, 4, 3, 6, 1, 7, 3, 2, 0, 6, 1, 5, 4, 7, 8, 2, 8, 0, 5, 4, 3,
        6, 1, 7, 3, 2, 0, 6, 1, 5, 4, 7, 8, 2, 8, 0, 5, 4, 3, 6, 1, 7, 3, 2, 0,
        6, 1, 5, 4, 7, 8, 2, 8, 0, 5, 4, 3, 6, 1, 7, 3, 2, 0, 6, 1, 5, 4, 7, 8,
        2, 8, 0, 5, 4, 3, 6, 1, 7, 3, 2, 0, 6, 1, 5, 4, 7, 8, 2, 8, 0, 5, 4, 3,
        6, 1, 7, 3, 2, 0, 6, 1, 5, 4, 7, 8, 2, 8, 0, 5, 4, 3, 6, 1, 7, 3, 2, 0,
        6, 1, 5, 4, 7, 8, 2, 8, 0, 5, 4, 3, 6, 1, 7, 3, 2, 0, 6, 1, 5, 4, 7, 8,
        2, 8, 0, 5, 4, 3, 6, 1, 7, 3, 2, 0, 6, 1, 5, 4, 7, 8, 2, 8, 0, 5, 4, 3,
        6, 1, 7, 3, 2, 0, 6, 1, 5, 4, 7, 8, 2, 8, 0, 5, 4, 3, 6, 1, 7, 3, 2, 0,
        6, 1, 5, 4, 7, 8, 2, 8, 0, 5, 4, 3, 6, 1, 7, 3, 2, 0, 6, 1, 5, 4, 7, 8,
        2, 8, 0, 5, 4, 3, 6, 1, 7, 3, 2, 0, 6, 1, 5, 4, 7, 8, 2, 8, 0, 5, 4, 3,
        6, 1, 7, 3, 2, 0, 6, 1, 5, 4, 7, 8, 2, 8, 0, 5, 4, 3, 6, 1, 7, 3, 2, 0,
        6, 1, 5, 4, 7, 8, 2, 8, 0, 5, 4, 3, 6, 1, 7, 3, 2, 0, 6, 1, 5, 4, 7, 8,
        2, 8, 0, 5, 4

- The model is trained on whole dataset and the accuracy for the whole training dataset is found to be 100%
- We would want to divide the dataset into training and test data and find the accuracy on test data

In [92]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, random_split

# Assuming you have standardized_data and y_one_hot
# Convert standardized_data and y_one_hot to PyTorch tensors
standardized_data = torch.tensor(standardized_data, dtype=torch.float32)
y_one_hot = torch.tensor(y_one_hot, dtype=torch.float32)

# Create a dataset and split it into training and testing sets
dataset = TensorDataset(standardized_data, y_one_hot)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Create data loaders for training and testing sets
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define the model
class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.fc1 = nn.Linear(107520, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 9)  # Assuming y is one-hot encoded with 9 classes
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = torch.relu(self.fc2(x))
        x = self.dropout(x)
        x = torch.relu(self.fc3(x))
        x = self.fc4(x)
        return x

model = SimpleModel()

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()  # Use CrossEntropyLoss for multi-class classification
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# Train the model
num_epochs = 20

for epoch in range(num_epochs):
    model.train()
    for batch_x, batch_y in train_loader:
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(batch_x)
        
        # Compute the loss
        loss = criterion(outputs, torch.max(batch_y, 1)[1])
        
        # Backward pass and optimize
        loss.backward()
        optimizer.step()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Evaluate the model on the test set
model.eval()
with torch.no_grad():
    correct_predictions = 0
    total_predictions = 0
    for batch_x, batch_y in test_loader:
        outputs = model(batch_x)
        predicted_classes = torch.argmax(outputs, dim=1)
        true_classes = torch.argmax(batch_y, dim=1)
        correct_predictions += (predicted_classes == true_classes).sum().item()
        total_predictions += batch_x.size(0)

accuracy = correct_predictions / total_predictions * 100

# Print the accuracy
print(f"Accuracy on test set: {accuracy:.2f}%")

Epoch [1/20], Loss: 2.6666
Epoch [2/20], Loss: 1.9852
Epoch [3/20], Loss: 2.1209
Epoch [4/20], Loss: 2.1303
Epoch [5/20], Loss: 4.2271
Epoch [6/20], Loss: 1.8788
Epoch [7/20], Loss: 2.5143
Epoch [8/20], Loss: 2.6892
Epoch [9/20], Loss: 1.8031
Epoch [10/20], Loss: 1.4404
Epoch [11/20], Loss: 2.5674
Epoch [12/20], Loss: 1.6803
Epoch [13/20], Loss: 1.2023
Epoch [14/20], Loss: 1.4562
Epoch [15/20], Loss: 1.4216
Epoch [16/20], Loss: 1.7338
Epoch [17/20], Loss: 1.1633
Epoch [18/20], Loss: 1.3994
Epoch [19/20], Loss: 1.3968
Epoch [20/20], Loss: 1.1915
Accuracy on test set: 13.25%


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, random_split

# Assuming you have standardized_data and y_one_hot
# Convert standardized_data and y_one_hot to PyTorch tensors
standardized_data = torch.tensor(standardized_data, dtype=torch.float32)
y_one_hot = torch.tensor(y_one_hot, dtype=torch.float32)

# Create a dataset and split it into training and testing sets
dataset = TensorDataset(standardized_data, y_one_hot)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Create data loaders for training and testing sets
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define the model
class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.fc1 = nn.Linear(107520, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 9)  # Assuming y is one-hot encoded with 9 classes

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model = SimpleModel()

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()  # Use CrossEntropyLoss for multi-class classification
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# Train the model
num_epochs = 100

for epoch in range(num_epochs):
    model.train()
    for batch_x, batch_y in train_loader:
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(batch_x)
        
        # Compute the loss
        loss = criterion(outputs, torch.max(batch_y, 1)[1])
        
        # Backward pass and optimize
        loss.backward()
        optimizer.step()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Evaluate the model on the test set
model.eval()
with torch.no_grad():
    correct_predictions = 0
    total_predictions = 0
    for batch_x, batch_y in test_loader:
        outputs = model(batch_x)
        predicted_classes = torch.argmax(outputs, dim=1)
        true_classes = torch.argmax(batch_y, dim=1)
        correct_predictions += (predicted_classes == true_classes).sum().item()
        total_predictions += batch_x.size(0)

accuracy = correct_predictions / total_predictions * 100

# Print the accuracy
print(f"Accuracy on test set: {accuracy:.2f}%")

In [93]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, random_split

# Assuming you have standardized_data and y_one_hot
# Convert standardized_data and y_one_hot to PyTorch tensors
standardized_data = torch.tensor(standardized_data, dtype=torch.float32)
y_one_hot = torch.tensor(y_one_hot, dtype=torch.float32)

# Create a dataset and split it into training and testing sets
dataset = TensorDataset(standardized_data, y_one_hot)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Create data loaders for training and testing sets
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define the model with batch normalization
class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.fc1 = nn.Linear(107520, 128)
        self.bn1 = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, 64)
        self.bn2 = nn.BatchNorm1d(64)
        self.fc3 = nn.Linear(64, 32)
        self.bn3 = nn.BatchNorm1d(32)
        self.fc4 = nn.Linear(32, 9)  # Assuming y is one-hot encoded with 9 classes
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = torch.relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = torch.relu(self.bn2(self.fc2(x)))
        x = self.dropout(x)
        x = torch.relu(self.bn3(self.fc3(x)))
        x = self.fc4(x)
        return x

model = SimpleModel()

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()  # Use CrossEntropyLoss for multi-class classification
optimizer = optim.Adam(model.parameters(), lr=0.0001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)  # Reduce LR by half every 5 epochs

# Train the model
num_epochs = 20

for epoch in range(num_epochs):
    model.train()
    for batch_x, batch_y in train_loader:
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(batch_x)
        
        # Compute the loss
        loss = criterion(outputs, torch.max(batch_y, 1)[1])
        
        # Backward pass and optimize
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # Gradient clipping
        optimizer.step()
    
    # Step the scheduler
    scheduler.step()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Evaluate the model on the test set
model.eval()
with torch.no_grad():
    correct_predictions = 0
    total_predictions = 0
    for batch_x, batch_y in test_loader:
        outputs = model(batch_x)
        predicted_classes = torch.argmax(outputs, dim=1)
        true_classes = torch.argmax(batch_y, dim=1)
        correct_predictions += (predicted_classes == true_classes).sum().item()
        total_predictions += batch_x.size(0)

accuracy = correct_predictions / total_predictions * 100

# Print the accuracy
print(f"Accuracy on test set: {accuracy:.2f}%")

Epoch [1/20], Loss: 2.1557
Epoch [2/20], Loss: 2.5185
Epoch [3/20], Loss: 2.1721
Epoch [4/20], Loss: 2.0175
Epoch [5/20], Loss: 2.0842
Epoch [6/20], Loss: 2.1184
Epoch [7/20], Loss: 1.9912
Epoch [8/20], Loss: 2.1052
Epoch [9/20], Loss: 2.0485
Epoch [10/20], Loss: 2.0790
Epoch [11/20], Loss: 1.9696
Epoch [12/20], Loss: 2.1117
Epoch [13/20], Loss: 2.1083
Epoch [14/20], Loss: 2.0458
Epoch [15/20], Loss: 2.1421
Epoch [16/20], Loss: 2.2228
Epoch [17/20], Loss: 2.0158
Epoch [18/20], Loss: 1.8224
Epoch [19/20], Loss: 2.1615
Epoch [20/20], Loss: 2.1206
Accuracy on test set: 13.25%


In [97]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, random_split

# Assuming you have standardized_data and y_one_hot
# Convert standardized_data and y_one_hot to PyTorch tensors
standardized_data = torch.tensor(standardized_data, dtype=torch.float32)
y_one_hot = torch.tensor(y_one_hot, dtype=torch.float32)

# Create a dataset and split it into training and testing sets
dataset = TensorDataset(standardized_data, y_one_hot)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Create data loaders for training and testing sets
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define the model with batch normalization
class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.fc1 = nn.Linear(107520, 128)
        self.bn1 = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, 64)
        self.bn2 = nn.BatchNorm1d(64)
        self.fc3 = nn.Linear(64, 32)
        self.bn3 = nn.BatchNorm1d(32)
        self.fc4 = nn.Linear(32, 9)  # Assuming y is one-hot encoded with 9 classes
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = torch.relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = torch.relu(self.bn2(self.fc2(x)))
        x = self.dropout(x)
        x = torch.relu(self.bn3(self.fc3(x)))
        x = self.fc4(x)
        return x

model = SimpleModel()

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()  # Use CrossEntropyLoss for multi-class classification
optimizer = optim.Adam(model.parameters(), lr=0.0001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)  # Reduce LR by half every 5 epochs

# Train the model
num_epochs = 20

for epoch in range(num_epochs):
    model.train()
    for batch_x, batch_y in train_loader:
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(batch_x)
        
        # Compute the loss
        loss = criterion(outputs, torch.max(batch_y, 1)[1])
        
        # Backward pass and optimize
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # Gradient clipping
        optimizer.step()
    
    # Step the scheduler
    scheduler.step()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Evaluate the model on the test set
model.eval()
with torch.no_grad():
    correct_predictions = 0
    total_predictions = 0
    for batch_x, batch_y in test_loader:
        outputs = model(batch_x)
        predicted_classes = torch.argmax(outputs, dim=1)
        true_classes = torch.argmax(batch_y, dim=1)
        correct_predictions += (predicted_classes == true_classes).sum().item()
        total_predictions += batch_x.size(0)

accuracy = correct_predictions / total_predictions * 100

# Print the accuracy
print(f"Accuracy on test set: {accuracy:.2f}%")

Epoch [1/20], Loss: 2.2467
Epoch [2/20], Loss: 2.0136
Epoch [3/20], Loss: 2.0043
Epoch [4/20], Loss: 2.0542
Epoch [5/20], Loss: 2.2275
Epoch [6/20], Loss: 2.1046
Epoch [7/20], Loss: 2.0746
Epoch [8/20], Loss: 2.2253
Epoch [9/20], Loss: 2.1439
Epoch [10/20], Loss: 2.0780
Epoch [11/20], Loss: 2.0387
Epoch [12/20], Loss: 2.1048
Epoch [13/20], Loss: 1.9629
Epoch [14/20], Loss: 1.9249
Epoch [15/20], Loss: 2.2909
Epoch [16/20], Loss: 2.0124
Epoch [17/20], Loss: 1.8794
Epoch [18/20], Loss: 2.1400
Epoch [19/20], Loss: 2.3873
Epoch [20/20], Loss: 2.0613
Accuracy on test set: 10.84%


In [108]:
# Assuming you have standardized_data and y_one_hot
# Convert standardized_data and y_one_hot to PyTorch tensors
standardized_data = torch.tensor(standardized_data, dtype=torch.float32)
y_one_hot = torch.tensor(y_one_hot, dtype=torch.float32)

In [115]:
torch.tensor(standardized_data, dtype=torch.float32).shape

torch.Size([414, 1, 224, 480])

In [116]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, random_split

# Assuming standardized_data and y_one_hot are already defined
# Convert standardized_data and y_one_hot to PyTorch tensors
standardized_data = torch.tensor(standardized_data, dtype=torch.float32)
y_one_hot = torch.tensor(y_one_hot, dtype=torch.float32)

# Reshape standardized_data to fit the CNN input requirements
# Assuming the data is 1D and needs to be reshaped to 2D (e.g., 224x480)
standardized_data = standardized_data.view(-1, 1, 224, 480)

# Convert y_one_hot to class indices
y_indices = torch.argmax(y_one_hot, dim=1)

# Create a dataset and split it into training and testing sets
dataset = TensorDataset(standardized_data, y_indices)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Create data loaders for training and testing sets
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define the CNN model
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.fc1 = nn.Linear(64 * 112 * 240, 128)
        self.bn3 = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, 64)
        self.bn4 = nn.BatchNorm1d(64)
        self.fc3 = nn.Linear(64, 9)  # Assuming y is one-hot encoded with 9 classes
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(torch.relu(self.bn1(self.conv1(x))))
        x = self.pool(torch.relu(self.bn2(self.conv2(x))))
        x = x.view(-1, 64 * 112 * 240)
        x = torch.relu(self.bn3(self.fc1(x)))
        x = self.dropout(x)
        x = torch.relu(self.bn4(self.fc2(x)))
        x = self.fc3(x)
        return x

model = CNNModel()

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()  # Use CrossEntropyLoss for multi-class classification
optimizer = optim.Adam(model.parameters(), lr=0.0001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)  # Reduce LR by half every 5 epochs

# Train the model
num_epochs = 20

for epoch in range(num_epochs):
    model.train()
    for batch_x, batch_y in train_loader:
        # Debug prints to check the shapes
        print(f'batch_x shape: {batch_x.shape}')
        print(f'batch_y shape: {batch_y.shape}')
        
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(batch_x)
        
        # Compute the loss
        loss = criterion(outputs, batch_y)
        
        # Backward pass and optimize
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # Gradient clipping
        optimizer.step()
    
    # Step the scheduler
    scheduler.step()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Evaluate the model on the test set
model.eval()
with torch.no_grad():
    correct_predictions = 0
    total_predictions = 0
    for batch_x, batch_y in test_loader:
        outputs = model(batch_x)
        predicted_classes = torch.argmax(outputs, dim=1)
        correct_predictions += (predicted_classes == batch_y).sum().item()
        total_predictions += batch_x.size(0)

accuracy = correct_predictions / total_predictions * 100

# Print the accuracy
print(f"Accuracy on test set: {accuracy:.2f}%")

batch_x shape: torch.Size([32, 1, 224, 480])
batch_y shape: torch.Size([32])


ValueError: Expected input batch_size (8) to match target batch_size (32).

In [117]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, random_split

# Reshape standardized_data to fit the CNN input requirements
# Assuming the data is 1D and needs to be reshaped to 2D (e.g., 224x480)
standardized_data = standardized_data.view(-1, 1, 224, 480)

# Convert y_one_hot to class indices
y_indices = torch.argmax(y_one_hot, dim=1)

# Create a dataset and split it into training and testing sets
dataset = TensorDataset(standardized_data, y_indices)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Create data loaders for training and testing sets
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define the CNN model
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.fc1 = nn.Linear(64 * 112 * 240, 128)  # Adjust this based on the output shape
        self.bn3 = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, 64)
        self.bn4 = nn.BatchNorm1d(64)
        self.fc3 = nn.Linear(64, 9)  # Assuming y is one-hot encoded with 9 classes
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(torch.relu(self.bn1(self.conv1(x))))
        print(f'After conv1 and pool: {x.shape}')
        x = self.pool(torch.relu(self.bn2(self.conv2(x))))
        print(f'After conv2 and pool: {x.shape}')
        x = x.view(-1, 64 * 56 * 120)  # Adjust this based on the output shape
        print(f'After view: {x.shape}')
        x = torch.relu(self.bn3(self.fc1(x)))
        x = self.dropout(x)
        x = torch.relu(self.bn4(self.fc2(x)))
        x = self.fc3(x)
        return x

model = CNNModel()

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()  # Use CrossEntropyLoss for multi-class classification
optimizer = optim.Adam(model.parameters(), lr=0.0001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)  # Reduce LR by half every 5 epochs

# Train the model
num_epochs = 20

for epoch in range(num_epochs):
    model.train()
    for batch_x, batch_y in train_loader:
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(batch_x)
        
        # Compute the loss
        loss = criterion(outputs, batch_y)
        
        # Backward pass and optimize
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # Gradient clipping
        optimizer.step()
    
    # Step the scheduler
    scheduler.step()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Evaluate the model on the test set
model.eval()
with torch.no_grad():
    correct_predictions = 0
    total_predictions = 0
    for batch_x, batch_y in test_loader:
        outputs = model(batch_x)
        predicted_classes = torch.argmax(outputs, dim=1)
        correct_predictions += (predicted_classes == batch_y).sum().item()
        total_predictions += batch_x.size(0)

accuracy = correct_predictions / total_predictions * 100

# Print the accuracy
print(f"Accuracy on test set: {accuracy:.2f}%")

After conv1 and pool: torch.Size([32, 32, 112, 240])
After conv2 and pool: torch.Size([32, 64, 56, 120])
After view: torch.Size([32, 430080])


RuntimeError: mat1 and mat2 shapes cannot be multiplied (32x430080 and 1720320x128)

In [118]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, random_split

# Assuming standardized_data and y_one_hot are already defined
# Convert standardized_data and y_one_hot to PyTorch tensors
standardized_data = torch.tensor(standardized_data, dtype=torch.float32)
y_one_hot = torch.tensor(y_one_hot, dtype=torch.float32)

# Reshape standardized_data to fit the CNN input requirements
# Assuming the data is 1D and needs to be reshaped to 2D (e.g., 224x480)
standardized_data = standardized_data.view(-1, 1, 224, 480)

# Convert y_one_hot to class indices
y_indices = torch.argmax(y_one_hot, dim=1)

# Create a dataset and split it into training and testing sets
dataset = TensorDataset(standardized_data, y_indices)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Create data loaders for training and testing sets
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define the CNN model
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.fc1 = nn.Linear(64 * 56 * 120, 128)  # Updated input size
        self.bn3 = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, 64)
        self.bn4 = nn.BatchNorm1d(64)
        self.fc3 = nn.Linear(64, 9)  # Assuming y is one-hot encoded with 9 classes
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(torch.relu(self.bn1(self.conv1(x))))
        x = self.pool(torch.relu(self.bn2(self.conv2(x))))
        x = x.view(-1, 64 * 56 * 120)  # Updated view size
        x = torch.relu(self.bn3(self.fc1(x)))
        x = self.dropout(x)
        x = torch.relu(self.bn4(self.fc2(x)))
        x = self.fc3(x)
        return x

model = CNNModel()

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()  # Use CrossEntropyLoss for multi-class classification
optimizer = optim.Adam(model.parameters(), lr=0.0001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)  # Reduce LR by half every 5 epochs

# Train the model
num_epochs = 20

for epoch in range(num_epochs):
    model.train()
    for batch_x, batch_y in train_loader:
        # Debug prints to check the shapes
        print(f'batch_x shape: {batch_x.shape}')
        print(f'batch_y shape: {batch_y.shape}')
        
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(batch_x)
        
        # Compute the loss
        loss = criterion(outputs, batch_y)
        
        # Backward pass and optimize
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # Gradient clipping
        optimizer.step()
    
    # Step the scheduler
    scheduler.step()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Evaluate the model on the test set
model.eval()
with torch.no_grad():
    correct_predictions = 0
    total_predictions = 0
    for batch_x, batch_y in test_loader:
        outputs = model(batch_x)
        predicted_classes = torch.argmax(outputs, dim=1)
        correct_predictions += (predicted_classes == batch_y).sum().item()
        total_predictions += batch_x.size(0)

accuracy = correct_predictions / total_predictions * 100

# Print the accuracy
print(f"Accuracy on test set: {accuracy:.2f}%")

batch_x shape: torch.Size([32, 1, 224, 480])
batch_y shape: torch.Size([32])
batch_x shape: torch.Size([32, 1, 224, 480])
batch_y shape: torch.Size([32])
batch_x shape: torch.Size([32, 1, 224, 480])
batch_y shape: torch.Size([32])
batch_x shape: torch.Size([32, 1, 224, 480])
batch_y shape: torch.Size([32])
batch_x shape: torch.Size([32, 1, 224, 480])
batch_y shape: torch.Size([32])
batch_x shape: torch.Size([32, 1, 224, 480])
batch_y shape: torch.Size([32])
batch_x shape: torch.Size([32, 1, 224, 480])
batch_y shape: torch.Size([32])
batch_x shape: torch.Size([32, 1, 224, 480])
batch_y shape: torch.Size([32])
batch_x shape: torch.Size([32, 1, 224, 480])
batch_y shape: torch.Size([32])
batch_x shape: torch.Size([32, 1, 224, 480])
batch_y shape: torch.Size([32])
batch_x shape: torch.Size([11, 1, 224, 480])
batch_y shape: torch.Size([11])
Epoch [1/20], Loss: 2.1431
batch_x shape: torch.Size([32, 1, 224, 480])
batch_y shape: torch.Size([32])
batch_x shape: torch.Size([32, 1, 224, 480])
batc

In [123]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, random_split

# Assuming standardized_data and y_one_hot are already defined
# Convert standardized_data and y_one_hot to PyTorch tensors
standardized_data = torch.tensor(standardized_data, dtype=torch.float32)
y_one_hot = torch.tensor(y_one_hot, dtype=torch.float32)

# Reshape standardized_data to fit the CNN input requirements
# Assuming the data is 1D and needs to be reshaped to 2D (e.g., 224x480)
standardized_data = standardized_data.view(-1, 1, 224, 480)

# Convert y_one_hot to class indices
y_indices = torch.argmax(y_one_hot, dim=1)

# Create a dataset and split it into training and testing sets
dataset = TensorDataset(standardized_data, y_indices)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Create data loaders for training and testing sets
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)  # Increased batch size
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Define the CNN model
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.fc1 = nn.Linear(64 * 56 * 120, 128)  # Updated input size
        self.bn3 = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, 64)
        self.bn4 = nn.BatchNorm1d(64)
        self.fc3 = nn.Linear(64, 9)  # Assuming y is one-hot encoded with 9 classes
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(torch.relu(self.bn1(self.conv1(x))))
        x = self.pool(torch.relu(self.bn2(self.conv2(x))))
        x = x.view(-1, 64 * 56 * 120)  # Updated view size
        x = torch.relu(self.bn3(self.fc1(x)))
        x = self.dropout(x)
        x = torch.relu(self.bn4(self.fc2(x)))
        x = self.fc3(x)
        return x

model = CNNModel()

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()  # Use CrossEntropyLoss for multi-class classification
optimizer = optim.Adam(model.parameters(), lr=0.01)  # Reduced learning rate
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.2)  # Reduce LR by half every 5 epochs

# Train the model
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    for batch_x, batch_y in train_loader:
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(batch_x)
        
        # Compute the loss
        loss = criterion(outputs, batch_y)
        
        # Backward pass and optimize
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # Gradient clipping
        optimizer.step()
        
        epoch_loss += loss.item()
    
    # Step the scheduler
    scheduler.step()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss/len(train_loader):.4f}')

# Evaluate the model on the test set
model.eval()
with torch.no_grad():
    correct_predictions = 0
    total_predictions = 0
    for batch_x, batch_y in test_loader:
        outputs = model(batch_x)
        predicted_classes = torch.argmax(outputs, dim=1)
        correct_predictions += (predicted_classes == batch_y).sum().item()
        total_predictions += batch_x.size(0)

accuracy = correct_predictions / total_predictions * 100

# Print the accuracy
print(f"Accuracy on test set: {accuracy:.2f}%")

Epoch [1/10], Loss: 2.2999
Epoch [2/10], Loss: 2.1989
Epoch [3/10], Loss: 2.1102
Epoch [4/10], Loss: 2.0589
Epoch [5/10], Loss: 2.0783
Epoch [6/10], Loss: 1.9289
Epoch [7/10], Loss: 1.9378
Epoch [8/10], Loss: 1.9206
Epoch [9/10], Loss: 1.8483
Epoch [10/10], Loss: 1.7778
Accuracy on test set: 9.64%
