# CNN-LSTM

In [129]:
import pandas as pd
import numpy as np
import more_itertools
import random

In [131]:
df = pd.read_csv('/Users/N1/Data-2020/PAMP2_Cleaned.csv') 
df = df.drop(['time_stamp'], axis =1) # They are already in order of time for each participant

In [145]:
df.activity_id.value_counts()

4     238761
17    238690
1     192523
3     189931
7     188107
2     185188
16    175353
6     164600
12    117216
13    104944
5      98199
24     49360
Name: activity_id, dtype: int64

### Sliding Window

In [138]:
from window_slider import Slider

def make_windows(df, bucket_size, overlap_count):
    window_list = []
    final = pd.DataFrame()
    activity_list = list(df['activity_id'].unique()) #list of the four activities
    sub_id_list = list(df['id'].unique()) #list of the subject ids
    X_cols = df.columns[2:-1]
    df_list = []
    
    for i in sub_id_list:
        df_subject = df[df['id'] == i] #isolate a single subject id
        for j in activity_list:
            df_subject_activity = df_subject[df_subject['activity_id'] == j] #isolate by activity
            final_df = pd.DataFrame()
            if df_subject_activity.empty:
                  pass
            else:
                df_flat = df_subject_activity.iloc[:,2:-1].T.values #array of arrays, each row is every single reading in an array for a sensor in that isolation 

                slider = Slider(bucket_size,overlap_count)
                slider.fit(df_flat)
                while True:
                    window_data = slider.slide()

                    if slider.reached_end_of_list(): break
                    window_list.append(list(window_data))
                final_df = final.append(window_list)
                final_df.columns = X_cols
                final_df.insert(9, "Subject_ID", [i]*len(final_df), True)
                final_df.insert(10, "Activity", [j]*len(final_df), True)
                df_list.append(final_df)
                window_list = []

    final = pd.DataFrame(columns = X_cols)

    for l in df_list:
        final = final.append(l)


    final
    final.columns = final.columns.map(''.join)
    return final


#### Please edit the cell below to change windows

In [141]:
window_size = 40
window_overlap = 0

In [142]:
windowed = make_windows(df, window_size, window_overlap)

ValueError: Bucket size should be smaller than list size

In [71]:
windowed.head(1)

Unnamed: 0,ACC1,ACC2,ACC3,TEMP,EDA,BVP,HR,Magnitude,SID,Subject_ID,Activity,Round
0,"[41.0, 41.0, 41.0, 41.0, 41.0, 41.0, 41.0, 41....","[27.2, 27.3, 27.4, 27.5, 27.6, 27.7, 27.8, 27....","[40.0, 40.0, 40.0, 40.0, 40.0, 40.0, 40.0, 40....","[32.39, 32.39, 32.39, 32.39, 32.34, 32.34, 32....","[0.275354, 0.276634, 0.270231, 0.270231, 0.268...","[15.25, -12.75, -42.99, 18.39, 13.61, -9.66, -...","[78.98, 78.83500000000002, 78.69, 78.545, 78.4...","[63.410093833710725, 63.453053512025726, 63.49...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0,1,1


In [72]:
windowed.shape

(6572, 12)

### Split Train Test

In [73]:
ID_list = list(windowed['Subject_ID'].unique())
random.shuffle(ID_list)
train = pd.DataFrame()
test = pd.DataFrame()

#change size of train/test split
train = windowed[windowed['Subject_ID'].isin(ID_list[:45])]
test = windowed[windowed['Subject_ID'].isin(ID_list[45:])]

print(train.shape, test.shape)

(5332, 12) (1240, 12)


In [74]:
X_train = train[['TEMP', 'EDA', 'HR', 'BVP', 'Magnitude', 'ACC1', 'ACC2', 'ACC3', 'SID']]
X_train = X_train.apply(pd.Series.explode).reset_index()

X_test = test[['TEMP', 'EDA', 'HR', 'BVP', 'Magnitude', 'ACC1', 'ACC2', 'ACC3', 'SID']]
X_test = X_test.apply(pd.Series.explode).reset_index()

X_test = X_test.drop(['index'], axis = 1)
X_train = X_train.drop(['index'], axis = 1)
#X_val = X_val.drop(['index'], axis = 1)
print(X_train.shape, X_test.shape, X_train.shape[0] + X_test.shape[0])

(426560, 9) (99200, 9) 525760


In [75]:
y_train = train['Activity'].values
y_test = test['Activity'].values
print(y_train.shape, y_test.shape, y_train.shape[0] + y_test.shape[0])

(5332,) (1240,) 6572


### One-Hot Encoding Subject_ID

In [76]:
X_train['train'] =1
X_test['train'] = 0

In [77]:
combined = pd.concat([X_train, X_test])

In [78]:
combined_dum = pd.get_dummies(combined['SID'])

In [79]:
combined = pd.concat([combined, combined_dum], axis =1)

In [80]:
X_train = combined[combined['train'] == 1]
X_test = combined[combined['train'] == 0]

X_train.drop(["train"], axis = 1, inplace = True)
X_test.drop(["train"], axis = 1, inplace = True)
X_train.drop(["SID"], axis = 1, inplace = True)
X_test.drop(["SID"], axis = 1, inplace = True)
print(X_train.shape, X_test.shape, X_train.shape[0] + X_test.shape[0])

(426560, 63) (99200, 63) 525760


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [81]:
X_train.head(1)

Unnamed: 0,TEMP,EDA,HR,BVP,Magnitude,ACC1,ACC2,ACC3,0.0,1.0,...,45.0,46.0,47.0,48.0,49.0,50.0,51.0,52.0,53.0,54.0
0,32.39,0.275354,78.98,15.25,63.4101,41,27.2,40,1,0,...,0,0,0,0,0,0,0,0,0,0


### Normalize data

In [82]:
from sklearn.preprocessing import StandardScaler

In [83]:
ss = StandardScaler()

X_train_SID = X_train.iloc[:, 8:]
X_test_SID = X_test.iloc[:, 8:]

X_train_cont = pd.DataFrame(ss.fit_transform(X_train.iloc[:,0:8]))
X_test_cont = pd.DataFrame(ss.transform(X_test.iloc[:,0:8]))

In [84]:
X_train = pd.concat([X_train_cont, X_train_SID], axis = 1)
X_test = pd.concat([X_test_cont, X_test_SID], axis = 1)

In [85]:
X_train

Unnamed: 0,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,0.0.1,1.0.1,...,45.0,46.0,47.0,48.0,49.0,50.0,51.0,52.0,53.0,54.0
0,-0.728891,-0.453260,-0.475053,0.210988,-0.319956,0.764460,0.406914,0.561826,1,0,...,0,0,0,0,0,0,0,0,0,0
1,-0.728891,-0.453112,-0.484625,-0.176622,-0.316437,0.764460,0.409437,0.561826,1,0,...,0,0,0,0,0,0,0,0,0,0
2,-0.728891,-0.453853,-0.494197,-0.595241,-0.312906,0.764460,0.411959,0.561826,1,0,...,0,0,0,0,0,0,0,0,0,0
3,-0.728891,-0.453853,-0.503769,0.254456,-0.309365,0.764460,0.414482,0.561826,1,0,...,0,0,0,0,0,0,0,0,0,0
4,-0.769189,-0.454001,-0.513341,0.188285,-0.305814,0.764460,0.417005,0.561826,1,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
426555,-0.970684,-0.403098,0.439075,0.011646,0.146625,0.256240,-0.561307,1.488342,0,0,...,0,0,0,0,0,0,0,0,0,1
426556,-0.970684,-0.404730,0.442541,-0.013964,0.087635,0.334660,-0.588021,1.411499,0,0,...,0,0,0,0,0,0,0,0,0,1
426557,-0.970684,-0.407251,0.446501,0.072141,0.045811,0.413080,-0.614734,1.334655,0,0,...,0,0,0,0,0,0,0,0,0,1
426558,-0.970684,-0.408141,0.450462,-0.020470,0.021542,0.491499,-0.641448,1.257811,0,0,...,0,0,0,0,0,0,0,0,0,1


### Reshaping windows as arrays

In [86]:
# Convert to transposed arrays
X_test = X_test.T.values
X_train = X_train.T.values

In [87]:
X_test = X_test.astype('float64')
X_train = X_train.astype('float64')

# Reshape to -1, window_size, # features
X_train = X_train.reshape((-1, window_size, X_train.shape[0]))
X_test = X_test.reshape((-1, window_size, X_test.shape[0])) 

print(X_train.shape,  y_train.shape, X_test.shape, y_test.shape)

(5332, 80, 63) (5332,) (1240, 80, 63) (1240,)


#### If using TF convert y_train and y_test to One-Hot

In [88]:
from keras.utils import np_utils
y_train_dummy = np_utils.to_categorical(y_train)
y_test_dummy = np_utils.to_categorical(y_test)

### Model

In [97]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Flatten, Dropout, LSTM, TimeDistributed, Conv1D, MaxPooling1D

In [98]:
n_timesteps, n_features, n_outputs = X_train.shape[1], X_train.shape[2], y_train_dummy.shape[1]
print(X_train.shape[1],X_train.shape[2], y_train_dummy.shape[1])

80 63 4


In [114]:
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(Dropout(0.5))
model.add(MaxPooling1D(pool_size=2))
model.add(LSTM(100, return_sequences=True))
model.add(LSTM(100, return_sequences=False))
model.add(Dropout(0.5))
model.add(Dense(100, activation='relu'))
model.add(Dense(n_outputs, activation='softmax')) #4 outputs are possible 
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [115]:
#Using test data as val data for now, we will be using LOOCV so dont need to worry about creating validation set
mFIT = model.fit(X_train, y_train_dummy, epochs = 50, validation_data = (X_test, y_test_dummy), batch_size = 32, verbose = 1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
  9/167 [>.............................] - ETA: 9s - loss: 1.1920 - accuracy: 0.4062

KeyboardInterrupt: 

In [62]:
X_train.shape

(63, 426560)