# Libraries

Import your libraries

In [1]:
import numpy as np
import pandas as pd
import math
from scipy import signal
import statistics as cal
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, plot_confusion_matrix, confusion_matrix

# Preparing Data

Split the Dataframe in chunks van n/100 seconden. Combine to 1 dataframe. With Sum,Div and Action split into Train and Valid/Test. 

In [2]:
def Dataloader(CsvFile, ChunkSize, ChunkOverlap, OperationList):
#CsvFile = put in the filename(as string) example : matrix_player_15_game_2_quartersplit.csv
#ChunkSize = put in the size you want the chunks to be (in seconds) example: 1
#ChunkOverlap = the size you want the chunks to overlap (in seconds) example: 0.5
#Operationlist = input dataframe of names with operations that u want to make changes to. example:
                                #Operationlist = pd.DataFrame({'frAcc':['cal.mean'], 'wheelRotationalSpeedX':['cal.mea']})


#load in csv file
    df = pd.read_csv(CsvFile)

    chunks = [df[i:i+ChunkSize] for i in range(0,df.shape[0],ChunkOverlap)]

    frames = []

    for chunk in chunks:
        chunk = chunk.drop(columns=['Unnamed: 0'])
        frames.append(chunk)

        
    df_end =[]
    
    for frame in frames:
        if len(frame) == 100:
            result = [] #Reset the result array
            for column in list(frame.columns):
        
                if column in OperationList: #Check if column is in namelist
            
                    if OperationList[str(column)].values == 'cal.mean': #You can at more function if you want
                        X = cal.mean(frame[str(column)].tolist()) #Calculate mean of all columns named in namelist with operation cal.mean
            
                    else: #If the given operations isn't cal.mean
                        frame[str(column)] = [abs(ele) for ele in frame[str(column)]] #Get absolute value of all elements in list
                        X = max(frame[str(column)].tolist()) #Get single max value from list 
       
                else:
                    frame[str(column)] = [abs(ele) for ele in frame[str(column)]] #Get absolute value of all elements in list
                    X = max(frame[str(column)].tolist()) #Get single max value from list
            
                result.append(X) #Append results to list
        
            df_end.append(result) #Append all results into one big list
    
    df_end = pd.DataFrame(df_end, columns = list(frame.columns)) #Convert list to df
    return(df_end)
    


# Importing Data

Import the CSV file with Actions, Sum and Div as a Dataframe called df. Fill the empty values of Action with 0. 
Replace NaN values with 0. Delete first 100 rows. 

In [3]:
OperationList = [{ 'wheelRotationalSpeedX' : ['Cal.mean']}]
df_train = pd.read_csv('Player_15_Game2_Sprints_Q1234.csv')
df_test = Dataloader('matrix_Player_8_game_2_QuarterSplit.csv', 100, 50, OperationList)
#print(Dataloader('matrix_Player_15_game_2_QuarterSplit.csv', 100, 50, OperationList))

In [4]:
df_train['wheelRotationalSpeedXDiff'] = np.insert(np.diff(df_train.wheelRotationalSpeedX,n=1),0,0)
df_train['Sum_WheelX_FrameZ'] = df_train.wheelRotationalSpeedX + df_train.frameRotationalSpeedZ

df_test['wheelRotationalSpeedXDiff'] = np.insert(np.diff(df_test.wheelRotationalSpeedX,n=1),0,0)
df_test['Sum_WheelX_FrameZ'] = df_test.wheelRotationalSpeedX + df_test.frameRotationalSpeedZ

In [5]:
Order = 5
cutoff_freq = 1.5
sampling_freq = 100
sampling_duration = len(df_test)

normalized_cutoff_freq = 2 * cutoff_freq / sampling_freq
numerator_coeffs, denominator_coeffs = signal.butter(Order, normalized_cutoff_freq)
df_test['Filt_WheelX'] = signal.lfilter(numerator_coeffs, denominator_coeffs, df_test.wheelRotationalSpeedX)
df_test['Filt_FrameZ'] = signal.lfilter(numerator_coeffs, denominator_coeffs, df_test.frameRotationalSpeedZ)

## Split data set in train and test
below are columns used to identify the best features for the dataset.

In [6]:
# split chunk data into train test validate (with colum [sum and Div] as input, and action as output)

train = df_train
test = df_test

column1 = ['frSpeed']
column2 = ['frSpeed','frAcc']
column3 = ['frSpeed','frAcc','wheelRotationalSpeedXDiff']
column4 = ['frSpeed','frAcc','wheelRotationalSpeedXDiff','frRoAcc' ]
column5 = ['frSpeed','frAcc','wheelRotationalSpeedXDiff','frRoAcc','frameRotationalSpeedZ' ]
column6 = ['frSpeed','frAcc','wheelRotationalSpeedXDiff','frRoAcc','frameRotationalSpeedZ','wheelRotationalSpeedX' ]
column7 = ['timeLine','frSpeed','frAcc','wheelRotationalSpeedXDiff','frRoAcc','frameRotationalSpeedZ','wheelRotationalSpeedX','Filt_FrameZ' ]
column8 = ['frSpeed','frAcc','wheelRotationalSpeedXDiff','frRoAcc','frameRotationalSpeedZ','wheelRotationalSpeedX','Filt_FrameZ','Filt_WheelX' ]
column9 = ['wheelRotationalSpeedXDiff']
column10 = ['wheelRotationalSpeedXDiff','Filt_WheelX']
column11 = ['wheelRotationalSpeedXDiff','Filt_WheelX','Filt_FrameZ']
column12 = ['wheelRotationalSpeedXDiff','frSpeed','frAcc','frameRotationalSpeedZ']
column13 = ['frAcc','frSpeed','frameRotationalSpeedZ','Sum_WheelX_FrameZ','wheelRotationalSpeedXDiff']


X_train = train[column7]

y_train = train[['Action']]


X_test = test[column7]

#y_test = test[['Action']]

#y_test

# Gridsearch toepassen


In [7]:

tree_model_2 =  RandomForestClassifier(class_weight='balanced',n_jobs = 10, random_state = 6)


param_grid_model_2 = {'n_estimators': [100, 200, 500,800], 'max_depth': [10,20,50] ,  'min_samples_leaf': [1,2,5,8,10] } 



#tree = GridSearchCV(tree_model_2,param_grid_model_2)
tree = RandomForestClassifier(class_weight='balanced',n_jobs = 10, random_state = 6, max_depth = 10, min_samples_leaf = 8, n_estimators = 100)
tree = tree.fit(X_train,y_train)
#print(tree.best_params_)
#print(tree.best_score_)
#print("Test accuracy of best grid search hypers:", tree.score(X_test, y_test))

  tree = tree.fit(X_train,y_train)


# Export Data

Export the results in CSV format. Layout = Time,Action

In [8]:
#X_test['Action'] = tree.predict(X_test)
#df_results = pd.DataFrame(X_test)
#df_results.to_csv('Sprint_results_Player8_Game2_randomforest.csv')
#df_results