In [1]:
import pandas as pd
import numpy as np
import math
import os
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.decomposition import PCA                 # for dimensionality reduction using PCA
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import classification_report,confusion_matrix
import matplotlib.pyplot as plt 
import seaborn as sns            # visualization tool
import matplotlib.cm as cm       # for colour mapping to use for the pca plots
import scipy.signal
from spectrum import *

In [4]:
def process_one_csv(foldername,k,filename):
    path = '/home/amaya/emp/Realtime-Sign-Language-Translation-to-Speech-DNN/NEW_PROCESS/FULL_CLEANED2/'+foldername+'/split/'+k+'/'+filename
    f = open(path,'rb')
    if b',' in f.readline():
        data = pd.read_csv(path)
    else:
        data = pd.read_csv(path,encoding = 'utf-16',delimiter = '\t')
    data = data.drop('Timestamp',axis = 1)
    if 'gyro' in filename:
        columns = list(data.columns)
    elif 'emg' in filename:
        columns = ['Emg1', 'Emg2', 'Emg4', 'Emg6', 'Emg7']

    #features = ['E','MAX','SSC','Sk','Ku','AR']

    vector = []
    
    for item in columns:
        temp = list(data[item])
        l = len(temp)
        
        # calculating mean
        mu = np.mean(np.array(temp))
        
        # calculating std
        dif = temp - mu
        s = np.sqrt(np.mean(np.array(dif)**2))
        
        # calculating E ***
        #out_columns.append(item+features[0])
        vector.append(np.mean(np.array(temp)**2))
        
        # calculating SSC ***
        #out_columns.append(item+features[2])
        tot = 0
        for i in range(1,l-1): 
            if (temp[i]-temp[i-1])*(temp[i+1]-temp[i]) < 0:
                tot += 1
        vector.append(tot/l-2)
        
        if 'gyro' in filename:
            # calculating max ***
            #out_columns.append(item+features[1])
            vector.append(max(temp))


        if 'emg' in filename:
            # calculating Sk ***
            # out_columns.append(item+features[3])
            vector.append((np.sum(np.array(dif)**3) * l)/((l-1)*(l-2)*(s**3)))

            # calculate Ku ***
            #out_columns.append(item+features[4])
            Ku = (np.sum(np.array(dif)**4) * l * (l+1))/((l-1)*(l-2)*(l-3)*(s**4)) - (3*((l-1)**2))/((l-2)*(l-3))
            vector.append(Ku)

    return vector

In [5]:
output = []
classes=[]
folders = os.listdir('/home/amaya/emp/Realtime-Sign-Language-Translation-to-Speech-DNN/NEW_PROCESS/FULL_CLEANED2/')

# process_one_csv(files[0])
for folder in folders:
    if folder[0] != '.':
        class_ = folder[:-11]
        
        for k in ['1', '2']:
            files = os.listdir('/home/amaya/emp/Realtime-Sign-Language-Translation-to-Speech-DNN/NEW_PROCESS/FULL_CLEANED2/'+folder+'/split/'+k+'/')
            temp = []
            for file in files:
                if file[-1] == 'v' and (('gyro' in file) or ('emg' in file)):
                    temp += process_one_csv(folder,k,file)
            if class_ in classes:
                temp.append(classes.index(class_))
            else:
                classes.append(class_)
                temp.append(classes.index(class_))
            
            output.append(temp)
            
processed = pd.DataFrame(output)
ocolumns = list(processed.columns)
processed.head()
print(classes)

['YELLOW', 'WATER', 'YES', 'THANKYOU', 'NO']


In [6]:
processed.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
0,416.366338,-1.97449,0.694468,-0.639694,108.755353,-1.968367,0.683196,-0.416161,43.514615,-1.972449,...,1102.491401,-1.931937,42.87189,2313.491837,-1.984293,74.722949,1317.906575,-1.984293,54.52404,0
1,885.682984,-1.924623,36.748547,1983.106925,-1.984925,70.611385,1148.560591,-1.984925,50.985131,202.984862,...,0.958715,32.81718,-1.947752,0.809194,0.514393,29.750168,-1.947752,0.832968,0.587504,0
2,135.666078,-1.962042,0.644536,0.037629,80.420641,-1.975131,0.913353,0.182295,30.849639,-1.972513,...,968.808077,-1.951327,42.873981,2317.918867,-1.986726,80.597075,952.529188,-1.986726,50.249259,0
3,224.304604,-1.964109,1.623689,2.874839,84.771866,-1.964109,0.272399,-0.67241,40.526828,-1.983911,...,816.347656,-1.947867,37.948618,2471.691043,-1.985782,80.317119,926.636653,-1.966825,49.259067,0
4,403.636721,-1.936585,25.305731,5655.153686,-1.985366,115.683595,590.178219,-1.956098,39.452069,441.559727,...,0.266654,23.094031,-1.954433,1.389351,1.15994,34.830868,-1.949507,0.829009,-0.037226,1


In [7]:
len(ocolumns)

30

In [8]:
processed.shape

(136, 30)

In [9]:
X = processed[ocolumns[:-1]]
y = processed[ocolumns[-1]]

In [11]:
scaler = preprocessing.MinMaxScaler()   # since the data set is not gaussian
scaled_df = scaler.fit_transform(X)
X = pd.DataFrame(scaled_df,columns = ocolumns[:-1])
y = processed[ocolumns[-1]]

In [12]:
svclassifier = SVC(kernel = 'linear', C = 100.0, gamma = 1.0)
kfold = KFold(n_splits=5,shuffle=True)
results_kfold = cross_val_score(svclassifier, X, y, cv=kfold)
print(results_kfold)
print(results_kfold.mean()*100)

[0.96428571 0.96296296 0.96296296 0.96296296 0.92592593]
95.58201058201057


In [12]:
# parameter tuning

param_grid = {'C': [0.01,0.1, 1, 10, 100,1000], 'gamma': [1, 0.1, 0.01, 0.001, 0.00001, 10,100]}
clf_grid = GridSearchCV(SVC(kernel = 'linear'), param_grid)
clf_grid.fit(X, y)
print("Best Parameters:\n", clf_grid.best_params_)



Best Parameters:
 {'C': 100, 'gamma': 1}


