#### PROBLEM STATEMENT :
*Develop and Evaluate a machine learning model for 3D skeleton-based action recognition using UTD MHAD dataset.
*Odd subjects (S1, S3, S5, S7) for Training and the rest (S2, S4, S6, S8) for testing. 
*Euclidean distance Distance feature from Hip joint to remaining all joints


In [65]:
#Step 1 : Importing Library
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os


In [66]:
#Step 2 : Importing Dataset
import scipy.io as sio
action_dict = sio.loadmat("D:/AI and ML/Skeleton/a1_s1_t1_skeleton.mat")

In [67]:
#Min Max of Frames and Shapes
import os
from scipy.io import loadmat
dataDir = "D:/AI and ML/Skeleton/"
mats = []
for file in os.listdir( dataDir ) :
    mats.append(loadmat(dataDir+file))
ar=[]
for arr in mats:
    ar.append(arr["d_skel"].shape)

sh_max=max(ar,key=lambda i:i[2])
fr_max=max(ar,key=lambda i:i[2])[2]
sh_min=min(ar,key=lambda i:i[2])
fr_min=min(ar,key=lambda i:i[2])[2]
print("The Maximum shape: ",sh_max)
print("The Maximum Frame: ",fr_max)
print("The Minimum shape: ",sh_min)
print("The Minimum Frame: ",fr_min)

The Maximum shape:  (20, 3, 125)
The Maximum Frame:  125
The Minimum shape:  (20, 3, 41)
The Minimum Frame:  41


In [69]:
#Create Train Data and Test Data Folders

os.makedirs('D:/AI and ML/TrainData')

os.makedirs('D:/AI and ML/TestData')

In [70]:
#Preparing dataset
#Getting train and test set
import shutil

for i in range(1,29):
    for j in range(1,9):
        for k in range(1,5):
            try:
                src_path = f"D:/AI and ML/Skeleton/a{i}_s{j}_t{k}_skeleton.mat"
                if j%2==1:
                    dst_path = f"D:/AI and ML/TrainData/a{i}_s{j}_t{k}_skeleton.mat"
                else:
                    dst_path = f"D:/AI and ML/TestData/a{i}_s{j}_t{k}_skeleton.mat"                    
                shutil.copy2(src_path, dst_path)
                
            except:
               pass
                

              

In [71]:
#Changing the shape of the whole dataset
import numpy as np
import scipy.io as sio
def changeshapeMatrix(dataset):
  total_matrix=[]
  for i in range(1,28):  #actions
      for j in range(1,9):  #subject
          for k in range(1,5):  #trial
            try:
                  action_matrix=[]
                  action_dict = sio.loadmat(f"{dataset}/a{i}_s{j}_t{k}_skeleton.mat")
                  action_sequence = action_dict['d_skel']
                  frames = 41               #fixing no of frames to 41 for all data.
                  for l in range(frames):  #frame No
                    frame_matrix=[]
                    for m in range(20):      #joint No.
                      x =  action_sequence[m][0][l]
                      y =  action_sequence[m][1][l]
                      z =  action_sequence[m][2][l]
                      frame_matrix.append([x,y,z])
                    action_matrix.append(frame_matrix)
                  total_matrix.append(action_matrix)  
            except:   
                  pass
  return total_matrix                
x_train =changeshapeMatrix('D:/AI and ML/TrainData')           
x_train[1][0]

[[-0.04767218977212906, 0.47897595167160034, 2.8248789310455322],
 [-0.041020020842552185, 0.2947116196155548, 2.8754937648773193],
 [-0.0340079739689827, -0.05171520635485649, 2.889636278152466],
 [-0.02850726805627346, -0.11463183164596558, 2.8382222652435303],
 [-0.2013532966375351, 0.1849222183227539, 2.8693034648895264],
 [-0.2405506670475006, -0.08823112398386002, 2.840900421142578],
 [-0.24052491784095764, -0.32166218757629395, 2.804069757461548],
 [-0.234945148229599, -0.3520013391971588, 2.7793638706207275],
 [0.13094641268253326, 0.1919800490140915, 2.8959977626800537],
 [0.18621505796909332, -0.08439784497022629, 2.886427164077759],
 [0.2157287746667862, -0.3134651184082031, 2.8209006786346436],
 [0.2224208265542984, -0.39268627762794495, 2.79575252532959],
 [-0.09961481392383575, -0.19117848575115204, 2.8168394565582275],
 [-0.11339667439460754, -0.6602845788002014, 2.921250581741333],
 [-0.07470904290676117, -1.023971438407898, 2.9456706047058105],
 [-0.10982659459114075, 

In [72]:
x_train=np.array(x_train)

In [73]:
x_train.shape

(431, 41, 20, 3)

In [74]:
type(x_train[0][0][0])

numpy.ndarray

**Finding the Euclidean distance between hip point and all other points.**

In [75]:

from scipy.spatial import distance

def distancePerFrame_matrix(arr):
  dist=[]
  for i in range(arr.shape[0]): #frame
    for j in range(20):
      if j==3:
        pass
      else:
        dist.append(distance.euclidean(arr[i][3], arr[i][j]))
  return dist

In [76]:
# distance matrix
def total_distance(x):
  distance=[]
  for i in range(x.shape[0]): #total actions
    d=distancePerFrame_matrix(x[i])
    distance.append(d)
  return distance

Train_TotalDistance = total_distance(x_train)
Train_TotalDistance[0][6]

0.3098147621307305

In [77]:
#Total euclidean distance values for Train data.
Train_TotalDistance = np.array(Train_TotalDistance)
Train_TotalDistance.shape

(431, 779)

In [78]:
Train_TotalDistance[0].shape

(779,)

**Test Data Processing**

In [79]:
x_test =changeshapeMatrix('D:/AI and ML/TestData')           
x_test[0][0][0][0]

-0.09021344780921936

In [80]:
x_test=np.array(x_test)
x_test.shape  

(430, 41, 20, 3)

In [81]:
x_test[0].shape

(41, 20, 3)

In [82]:
#Total euclidean distance values for Test data.
Test_TotalDistance = total_distance(x_test)
Test_TotalDistance = np.array(Test_TotalDistance)
Test_TotalDistance.shape

(430, 779)

In [83]:
#Extracting action names

def action_names(dataset):
    actions=[]
    for i in range(len(os.listdir(dataset))):
        actions.append(os.listdir(dataset)[i].split('_')[0])
    return actions


trainset_action_names = action_names('D:/AI and ML/TestData')
trainset_action_names

['a10',
 'a10',
 'a10',
 'a10',
 'a10',
 'a10',
 'a10',
 'a10',
 'a10',
 'a10',
 'a10',
 'a10',
 'a10',
 'a10',
 'a10',
 'a10',
 'a11',
 'a11',
 'a11',
 'a11',
 'a11',
 'a11',
 'a11',
 'a11',
 'a11',
 'a11',
 'a11',
 'a11',
 'a11',
 'a11',
 'a11',
 'a11',
 'a12',
 'a12',
 'a12',
 'a12',
 'a12',
 'a12',
 'a12',
 'a12',
 'a12',
 'a12',
 'a12',
 'a12',
 'a12',
 'a12',
 'a12',
 'a12',
 'a13',
 'a13',
 'a13',
 'a13',
 'a13',
 'a13',
 'a13',
 'a13',
 'a13',
 'a13',
 'a13',
 'a13',
 'a13',
 'a13',
 'a13',
 'a13',
 'a14',
 'a14',
 'a14',
 'a14',
 'a14',
 'a14',
 'a14',
 'a14',
 'a14',
 'a14',
 'a14',
 'a14',
 'a14',
 'a14',
 'a14',
 'a14',
 'a15',
 'a15',
 'a15',
 'a15',
 'a15',
 'a15',
 'a15',
 'a15',
 'a15',
 'a15',
 'a15',
 'a15',
 'a15',
 'a15',
 'a15',
 'a15',
 'a16',
 'a16',
 'a16',
 'a16',
 'a16',
 'a16',
 'a16',
 'a16',
 'a16',
 'a16',
 'a16',
 'a16',
 'a16',
 'a16',
 'a16',
 'a16',
 'a17',
 'a17',
 'a17',
 'a17',
 'a17',
 'a17',
 'a17',
 'a17',
 'a17',
 'a17',
 'a17',
 'a17',
 'a17',


In [84]:
#Column names
def column_names():
    column_names=[]
    for i in range(1,42):  #frames
      for j in range(1,21):  #joints
        if j==4:
          pass
        else:
          column_names.append(f"f{i}_j4_j{j}")
    return column_names

column_names = column_names()
column_names

['f1_j4_j1',
 'f1_j4_j2',
 'f1_j4_j3',
 'f1_j4_j5',
 'f1_j4_j6',
 'f1_j4_j7',
 'f1_j4_j8',
 'f1_j4_j9',
 'f1_j4_j10',
 'f1_j4_j11',
 'f1_j4_j12',
 'f1_j4_j13',
 'f1_j4_j14',
 'f1_j4_j15',
 'f1_j4_j16',
 'f1_j4_j17',
 'f1_j4_j18',
 'f1_j4_j19',
 'f1_j4_j20',
 'f2_j4_j1',
 'f2_j4_j2',
 'f2_j4_j3',
 'f2_j4_j5',
 'f2_j4_j6',
 'f2_j4_j7',
 'f2_j4_j8',
 'f2_j4_j9',
 'f2_j4_j10',
 'f2_j4_j11',
 'f2_j4_j12',
 'f2_j4_j13',
 'f2_j4_j14',
 'f2_j4_j15',
 'f2_j4_j16',
 'f2_j4_j17',
 'f2_j4_j18',
 'f2_j4_j19',
 'f2_j4_j20',
 'f3_j4_j1',
 'f3_j4_j2',
 'f3_j4_j3',
 'f3_j4_j5',
 'f3_j4_j6',
 'f3_j4_j7',
 'f3_j4_j8',
 'f3_j4_j9',
 'f3_j4_j10',
 'f3_j4_j11',
 'f3_j4_j12',
 'f3_j4_j13',
 'f3_j4_j14',
 'f3_j4_j15',
 'f3_j4_j16',
 'f3_j4_j17',
 'f3_j4_j18',
 'f3_j4_j19',
 'f3_j4_j20',
 'f4_j4_j1',
 'f4_j4_j2',
 'f4_j4_j3',
 'f4_j4_j5',
 'f4_j4_j6',
 'f4_j4_j7',
 'f4_j4_j8',
 'f4_j4_j9',
 'f4_j4_j10',
 'f4_j4_j11',
 'f4_j4_j12',
 'f4_j4_j13',
 'f4_j4_j14',
 'f4_j4_j15',
 'f4_j4_j16',
 'f4_j4_j17',
 'f4_j4_j1

**Data Frame Creation**

In [85]:
train_df = pd.DataFrame(Train_TotalDistance, columns = column_names)
train_df['action']=action_names('D:/AI and ML/TrainData')
train_df.head()

Unnamed: 0,f1_j4_j1,f1_j4_j2,f1_j4_j3,f1_j4_j5,f1_j4_j6,f1_j4_j7,f1_j4_j8,f1_j4_j9,f1_j4_j10,f1_j4_j11,...,f41_j4_j12,f41_j4_j13,f41_j4_j14,f41_j4_j15,f41_j4_j16,f41_j4_j17,f41_j4_j18,f41_j4_j19,f41_j4_j20,action
0,0.596486,0.413395,0.081272,0.34785,0.214565,0.299434,0.309815,0.352218,0.225703,0.313889,...,0.413896,0.10393,0.558047,0.917345,0.982832,0.109738,0.555694,0.911627,0.975258,a10
1,0.594067,0.411227,0.081438,0.347238,0.213697,0.298294,0.320039,0.350392,0.222134,0.315414,...,0.416734,0.107002,0.555068,0.921251,0.984278,0.108967,0.564071,0.916541,0.983351,a10
2,0.595532,0.413216,0.081486,0.348421,0.21181,0.298,0.321079,0.352362,0.225022,0.316188,...,0.376553,0.109298,0.556776,0.919226,0.981184,0.107844,0.559775,0.913966,0.973348,a10
3,0.596916,0.413163,0.081635,0.346888,0.211688,0.297715,0.323778,0.351386,0.22479,0.312891,...,0.352504,0.10548,0.555653,0.921737,0.983678,0.106139,0.558161,0.914565,0.979287,a10
4,0.577218,0.393069,0.068641,0.328149,0.193807,0.259964,0.292001,0.329771,0.197177,0.265825,...,0.388782,0.099644,0.435761,0.772174,0.807672,0.096684,0.513373,0.861428,0.908636,a10


In [86]:
test_df = pd.DataFrame(Test_TotalDistance, columns = column_names)
test_df["actions"]=action_names('D:/AI and ML/TestData')
test_df.head()


Unnamed: 0,f1_j4_j1,f1_j4_j2,f1_j4_j3,f1_j4_j5,f1_j4_j6,f1_j4_j7,f1_j4_j8,f1_j4_j9,f1_j4_j10,f1_j4_j11,...,f41_j4_j12,f41_j4_j13,f41_j4_j14,f41_j4_j15,f41_j4_j16,f41_j4_j17,f41_j4_j18,f41_j4_j19,f41_j4_j20,actions
0,0.589142,0.405969,0.078732,0.327715,0.202777,0.266993,0.302379,0.337826,0.205231,0.280498,...,0.38936,0.09578,0.468539,0.791354,0.850925,0.092787,0.474196,0.79382,0.843147,a10
1,0.594909,0.407702,0.077187,0.326511,0.196142,0.269649,0.320815,0.338866,0.212974,0.282967,...,0.36666,0.099954,0.482406,0.797747,0.860573,0.101843,0.482008,0.79986,0.852667,a10
2,0.593122,0.408253,0.075317,0.329442,0.195444,0.269815,0.302283,0.335721,0.214719,0.284685,...,0.359505,0.098968,0.48194,0.792499,0.85522,0.098338,0.479028,0.800937,0.848762,a10
3,0.589969,0.403928,0.075352,0.325942,0.201803,0.274966,0.302331,0.33776,0.206404,0.272746,...,0.374528,0.09845,0.481968,0.789283,0.855427,0.095169,0.478841,0.794092,0.845206,a10
4,0.569038,0.381323,0.071181,0.309187,0.20008,0.268207,0.280244,0.322795,0.199353,0.266214,...,0.346737,0.095463,0.503745,0.8373,0.881858,0.093372,0.440486,0.841663,0.887768,a10


**Data Preprocessing**

In [87]:
#Imputing missing data
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 431 entries, 0 to 430
Columns: 780 entries, f1_j4_j1 to action
dtypes: float64(779), object(1)
memory usage: 2.6+ MB


**Splitting the data into Train and Test data.**

In [88]:
x_train = train_df.iloc[:,:-1].values
y_train = train_df.iloc[:,-1].values
x_test = test_df.iloc[:,:-1].values
y_test = test_df.iloc[:,-1].values

**Training The Model**

In [89]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
lr.fit(x_train, y_train)
y_train_pred = lr.predict(x_train)
y_test_pred = lr.predict(x_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


**Evaluating the Model**

In [90]:
#Evaluating the Model - Train data
from sklearn.metrics import accuracy_score
accuracy_score(y_train,y_train_pred)

0.8631090487238979

In [91]:
#Evaluating the Model - Test data
accuracy_score(y_test, y_test_pred)

0.5255813953488372

In [92]:
y_test_pred

array(['a27', 'a27', 'a10', 'a10', 'a10', 'a10', 'a27', 'a11', 'a10',
       'a10', 'a10', 'a10', 'a18', 'a27', 'a18', 'a18', 'a27', 'a27',
       'a27', 'a27', 'a11', 'a11', 'a11', 'a14', 'a18', 'a18', 'a18',
       'a18', 'a14', 'a11', 'a11', 'a11', 'a27', 'a27', 'a27', 'a27',
       'a12', 'a27', 'a12', 'a12', 'a18', 'a18', 'a18', 'a18', 'a27',
       'a27', 'a27', 'a27', 'a13', 'a13', 'a24', 'a13', 'a13', 'a24',
       'a24', 'a13', 'a13', 'a13', 'a13', 'a13', 'a13', 'a13', 'a13',
       'a13', 'a27', 'a2', 'a27', 'a2', 'a10', 'a10', 'a10', 'a10', 'a12',
       'a14', 'a12', 'a12', 'a14', 'a14', 'a11', 'a10', 'a22', 'a22',
       'a24', 'a9', 'a15', 'a15', 'a15', 'a15', 'a4', 'a15', 'a13', 'a15',
       'a15', 'a15', 'a4', 'a5', 'a16', 'a16', 'a16', 'a16', 'a26', 'a26',
       'a16', 'a24', 'a25', 'a16', 'a16', 'a16', 'a16', 'a26', 'a26',
       'a26', 'a19', 'a17', 'a17', 'a19', 'a14', 'a17', 'a17', 'a17',
       'a18', 'a18', 'a19', 'a19', 'a19', 'a18', 'a19', 'a27', 'a18',
     