In [1]:
import numpy as np
import pandas as pd
from scipy import signal
import matplotlib.pyplot as plt
import numpy.fft as nf
from scipy import stats
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB

In [2]:
def filter_data(df):
    b, a = signal.butter(3, 0.15, btype='lowpass', analog=False)
    low_passed = signal.filtfilt(b, a, df['aT (m/s^2)'])
    #peaks,_ = signal.find_peaks(df['aT (m/s^2)'])
    #plt.plot(df['time'],low_passed)
    #plt.plot(df['time'],df['aT (m/s^2)'])
    return low_passed


def calculate_vel(df):
    temp=df.shift(periods=-1)
    diff_df=abs(df-temp)
    temp_df=diff_df.shift(periods=1)
    temp_df.iloc[0]=df.iloc[0]
    df['diff_time']=temp_df['time']
    df['velocity']=df['diff_time']*df['aT (m/s^2)']
    return df
    

def calculate_delta_position(df):
    df['position']=df['diff_time']*df['velocity']
    return df

def calculate_total_position(df):
    df2=df
    df2=df2.cumsum(axis=0)
    df['total_position']=df2['position']
    return df


def transform_df(df):
    #using the above function to form the final df we want
    df['aT (m/s^2)']=filter_data(df)
    df=calculate_vel(df)
    df=calculate_delta_position(df)
    df=calculate_total_position(df)
    df = df[0:16000]

    return df


def get_model(df):
    X=np.stack([df['time']],axis=1)
    X_with=np.concatenate([np.ones(X.shape),X],axis=1)
    model=LinearRegression(fit_intercept=False)
    model.fit(X_with,df['total_position'])
    return model
    

def get_model_train_set(df):
    X=np.stack([df['time']],axis=1)
    X_with=np.concatenate([np.ones(X.shape),X],axis=1)
    X_train, X_valid, y_train, y_valid = train_test_split(X_with,df['total_position'])
    return X_train, X_valid, y_train, y_valid


In [3]:
#foot model:10 mins
df=pd.read_csv('foot') 
df=transform_df(df)
model_long=get_model(df)

#print(model.coef_)

# foot model:3 mins
injury=pd.read_csv('injury')
injury=transform_df(injury)
model_short=get_model(injury)


In [4]:
list_of_title = np.array(['right_foot_1','right_foot_2','right_foot_3','female_1','female_2',
                                    'injury','injury_1','injury_2'])
list_of_situation = np.array(['0','0','0','0','0','1','1','1',])


list_of_whole = []
for i in range(8):
    input_file = pd.read_csv(list_of_title[i])
    input_file = transform_df(input_file)
    
    X_train, X_valid, y_train, y_valid = get_model_train_set(input_file)
    
    print("The injury situation of input",list_of_title[i], "is ", list_of_situation[i])
    temp1 = model_short.score(X_train,y_train)
    temp2 = model_short.score(X_valid,y_valid)
    print("The train socre of input",list_of_title[i], "is ", temp1)
    print("The valid socre of input",list_of_title[i], "is ", temp2)
    list_of_whole.append([])
    list_of_whole[i].append(temp1)
    list_of_whole[i].append(temp2)
    list_of_whole[i].append(list_of_situation[i])




The injury situation of input right_foot_1 is  0
The train socre of input right_foot_1 is  -0.7223706755930259
The valid socre of input right_foot_1 is  -0.7360801210370651
The injury situation of input right_foot_2 is  0
The train socre of input right_foot_2 is  0.4777120902285523
The valid socre of input right_foot_2 is  0.4821691655643351
The injury situation of input right_foot_3 is  0
The train socre of input right_foot_3 is  -0.2425672693217027
The valid socre of input right_foot_3 is  -0.2743506883479294
The injury situation of input female_1 is  0
The train socre of input female_1 is  -0.20359903552981828
The valid socre of input female_1 is  -0.2158528184617805
The injury situation of input female_2 is  0
The train socre of input female_2 is  -0.40461328632861937
The valid socre of input female_2 is  -0.42947852594339553
The injury situation of input injury is  1
The train socre of input injury is  0.9983987932474274
The valid socre of input injury is  0.998388367797898
The in

In [5]:
list_of_whole

[[-0.7223706755930259, -0.7360801210370651, '0'],
 [0.4777120902285523, 0.4821691655643351, '0'],
 [-0.2425672693217027, -0.2743506883479294, '0'],
 [-0.20359903552981828, -0.2158528184617805, '0'],
 [-0.40461328632861937, -0.42947852594339553, '0'],
 [0.9983987932474274, 0.998388367797898, '1'],
 [0.6314584795128141, 0.6402336353291085, '1'],
 [0.513153854802755, 0.5022287839642208, '1']]

In [6]:
dataframe = pd.DataFrame(list_of_whole,columns = ['train_score','valid_score','situation'],index = ['right_foot_1','right_foot_2','right_foot_3',
                                                        'female_1','female_2','injury','injury_1','injury_2'])

dataframe

Unnamed: 0,train_score,valid_score,situation
right_foot_1,-0.722371,-0.73608,0
right_foot_2,0.477712,0.482169,0
right_foot_3,-0.242567,-0.274351,0
female_1,-0.203599,-0.215853,0
female_2,-0.404613,-0.429479,0
injury,0.998399,0.998388,1
injury_1,0.631458,0.640234,1
injury_2,0.513154,0.502229,1


In [7]:
#plug the dataframe into classifier
G_model=GaussianNB()
X=dataframe[['train_score','valid_score']]
y=dataframe['situation']
G_model.fit(X,y)
print(G_model.theta_)
print(G_model.sigma_)

[[-0.21908764 -0.2347186 ]
 [ 0.71433704  0.71361693]]
[[0.15480693 0.1610299 ]
 [0.0426782  0.04372161]]


In [8]:
#test the model working state， and worked
#if it fits injury_classifier and also matches the answer of normal_classifier, then true
test=pd.read_csv('male_5')
test=transform_df(test)
X_train, X_valid, y_train, y_valid = get_model_train_set(test)
temp1 = model_short.score(X_train,y_train)
temp2 = model_short.score(X_valid,y_valid)
X_check=[[temp1,temp2]]
G_model.predict(X_check)

array(['0'], dtype='<U1')