# Librairies

In [None]:
import numpy as np
import math
import random
import pandas as pd
from os import listdir
from os.path import isfile, join
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from scipy.special import softmax


# Functions 

In [2]:
def rotation_matrix(axis, theta):
    """
    Return the rotation matrix associated with counterclockwise rotation about
    the given axis by theta radians.
    """
    axis = np.asarray(axis)
    axis = axis / math.sqrt(np.dot(axis, axis))
    a = math.cos(theta / 2.0)
    b, c, d = -axis * math.sin(theta / 2.0)
    aa, bb, cc, dd = a * a, b * b, c * c, d * d
    bc, ad, ac, ab, bd, cd = b * c, a * d, a * c, a * b, b * d, c * d
    return np.array([[aa + bb - cc - dd, 2 * (bc + ad), 2 * (bd - ac)],
                     [2 * (bc - ad), aa + cc - bb - dd, 2 * (cd + ab)],
                     [2 * (bd + ac), 2 * (cd - ab), aa + dd - bb - cc]])



In [3]:
def get_rotated_vector(theta,axis,vector): # returns a rotated vector
    return np.dot(rotation_matrix(axis, theta), vector)

In [4]:
def get_rotated_dataframe_X(dataframe): # return a dataframe with the rotated vector for a rotation around the X axis 
    df=dataframe.copy()
    df['vector'] = df[['x','y','z']].agg(list, axis=1)
    
    df['theta']=np.random.uniform(0, 360)

    
    df['rotated_vector'] = df.apply(lambda x: get_rotated_vector(x.theta,[1,0,0],x.vector), axis=1)
    
    return df


In [5]:
def get_rotated_dataframe_Y(dataframe): # return a dataframe with the rotated vector for a rotation around the Y axis 
    df=dataframe.copy()
    df['vector'] = df[['x','y','z']].agg(list, axis=1)
    
    df['theta']=np.random.uniform(0, 360)

    
    df['rotated_vector'] = df.apply(lambda x: get_rotated_vector(x.theta,[0,1,0],x.vector), axis=1)
    
    return df


In [6]:
def get_rotated_dataframe_Z(dataframe): # return a dataframe with the rotated vector for a rotation around the Z axis 
    df=dataframe.copy()
    df['vector'] = df[['x','y','z']].agg(list, axis=1)
    
    df['theta']=np.random.uniform(0, 360)

    
    df['rotated_vector'] = df.apply(lambda x: get_rotated_vector(x.theta,[0,0,1],x.vector), axis=1)
    
    return df


In [7]:
def get_rotated_random_df_list_X(random_df_l): # apply random rotations to parts of dataframes that are contained in a list
    
    rotated_random_df_list=[]

    for dataframe in random_df_l:
        rotated_random_df_list.append(get_rotated_dataframe_X(dataframe))
        
    return rotated_random_df_list
    

In [8]:
def get_list_of_dataframe(path): # AKA the location of the text files  # make sure to replace all '\' with '/'
    files = [f for f in listdir(path) if isfile(join(path, f))]
    
    path_list=[]
    df_list=[]
    
    for i in files:
        path_list.append(path+"/"+i)


    for i in path_list:
        df=pd.read_csv(i,header=None,sep=' ')
        df.columns =['timestamp', 'x', 'y', 'z']
        df=df.set_index('timestamp')

        df_list.append(df)
        
    return df_list

    

In [9]:
def random_sample_dataframe(df):       # returns  a random sample of  dataframe
    number_of_rows=random.randint(7000,10000)
    n = random.randint(0, len(df)-number_of_rows)

    n_random_consecutive_rows = df[n:n+number_of_rows]
    
    return n_random_consecutive_rows

In [10]:
def get_random_datafame_list(df): # returns a list of random sample for each window of dataframe
                                  # i decided to have 10 windows per dataframe except for one because it only works with 11 windows
    random_df_l=[]
    if len(df)==135804:
        n=round(len(df)/11)
    else:
        n=round(len(df)/10)
        

    df_partitions = [df[i:i+n].copy() for i in range(0,df.shape[0],n)]

    for i in df_partitions:
        random_df_l.append(random_sample_dataframe(i))
        
    return random_df_l

In [11]:
def get_rotated_random_df_list_X(random_df_l): # rotate the list of dataframes under the X axis
    
    rotated_random_df_list=[]

    for dataframe in random_df_l:
        rotated_random_df_list.append(get_rotated_dataframe_X(dataframe))
        
    return rotated_random_df_list
    

In [12]:
def get_rotated_random_df_list_Y(random_df_l): # rotate the list of dataframes under the Y axis
    
    rotated_random_df_list=[]

    for dataframe in random_df_l:
        rotated_random_df_list.append(get_rotated_dataframe_Y(dataframe))
        
    return rotated_random_df_list
    

In [13]:
def get_rotated_random_df_list_Z(random_df_l): # rotate the list of dataframes under the Y axis
    
    rotated_random_df_list=[]

    for dataframe in random_df_l:
        rotated_random_df_list.append(get_rotated_dataframe_Z(dataframe))
        
    return rotated_random_df_list
    

In [14]:
def get_rotated_x(rotated_vector):
    return rotated_vector[0]

def get_rotated_y(rotated_vector):
    return rotated_vector[1]

def get_rotated_z(rotated_vector):
    return rotated_vector[2]

In [15]:
def add_random_rotations_to_initial_dataframe(df,clean_rotated_rand_df_l): # add the random rotations to the dataframe by updating df values
    df['theta']=0
    for rotated_dataframe in clean_rotated_rand_df_l:
        df.update(rotated_dataframe)
        
    return df

In [16]:

def get_clean_rotated_random_df_l(rotated_random_df_l): # clean the dataframe by removing unwanted columns
    
    clean_rotated_rand_df_l=[]
    for dataframe in rotated_random_df_l:
        
        dataframe['x']=dataframe['rotated_vector'].apply(get_rotated_x)
        dataframe['y']=dataframe['rotated_vector'].apply(get_rotated_y)
        dataframe['z']=dataframe['rotated_vector'].apply(get_rotated_z)
        dataframe=dataframe.drop(['vector', 'rotated_vector'], axis=1)
        
        clean_rotated_rand_df_l.append(dataframe)
        
    
    return clean_rotated_rand_df_l

# Main functions

In [17]:
# Main function
def apply_random_rotations_to_dataframe_X(df): # all the functions put together to get the randomly rotated dataframe with the features we want
    random_df_l=get_random_datafame_list(df)   # returns a list containing random subparts for each partition of the dataframe (partitions=10)
    rotated_random_df_l=get_rotated_random_df_list_X(random_df_l) # rotated the random subparts
    clean_rotated_rand_df_l=get_clean_rotated_random_df_l(rotated_random_df_l) # remove duplicate information and put each feature in a single coLumn
    df=add_random_rotations_to_initial_dataframe(df,clean_rotated_rand_df_l)   # update the initial dataframe with the random rotations
    
    df_diff=df.copy()
    df_diff['y_diff']=df_diff['y'].diff(periods=1)
    df_diff['z_diff']=df_diff['z'].diff(periods=1)
    df_diff=df_diff.iloc[1: , :]
    
    df_diff['softmax_y_diff'] = softmax(df_diff['y_diff'])
    df_diff['softmax_z_diff'] = softmax(df_diff['z_diff'])
    
    return df_diff

In [18]:
# main function
def apply_random_rotations_to_dataframe_Y(df): # all the functions put together to get the randomly rotated dataframe with the features we want
    random_df_l=get_random_datafame_list(df)   # returns a list containing random subparts for each partition of the dataframe (partitions=10)
    rotated_random_df_l=get_rotated_random_df_list_Y(random_df_l) # rotate the random subparts
    clean_rotated_rand_df_l=get_clean_rotated_random_df_l(rotated_random_df_l) # remove duplicate information and put each feature in a single coLumn
    df=add_random_rotations_to_initial_dataframe(df,clean_rotated_rand_df_l)  # update the initial dataframe with the random rotations
    
    df_diff=df.copy()
    df_diff['x_diff']=df_diff['x'].diff(periods=1)
    df_diff['z_diff']=df_diff['z'].diff(periods=1)
    df_diff=df_diff.iloc[1: , :]
    
    df_diff['softmax_x_diff'] = softmax(df_diff['x_diff'])
    df_diff['softmax_z_diff'] = softmax(df_diff['z_diff'])
    
    return df_diff

In [19]:
# main function
def apply_random_rotations_to_dataframe_Z(df): # all the functions put together to get the randomly rotated dataframe with the features we want
    random_df_l=get_random_datafame_list(df) # returns a list containing random subparts for each partition of the dataframe (partitions=10)
    rotated_random_df_l=get_rotated_random_df_list_Z(random_df_l)  # rotate the random subparts
    clean_rotated_rand_df_l=get_clean_rotated_random_df_l(rotated_random_df_l) # remove duplicate information and put each feature in a single coLumn
    df=add_random_rotations_to_initial_dataframe(df,clean_rotated_rand_df_l)  # update the initial dataframe with the random rotations
     
    df_diff=df.copy()
    df_diff['x_diff']=df_diff['x'].diff(periods=1)
    df_diff['y_diff']=df_diff['y'].diff(periods=1)
    df_diff=df_diff.iloc[1: , :]
    
    df_diff['softmax_x_diff'] = softmax(df_diff['x_diff'])
    df_diff['softmax_y_diff'] = softmax(df_diff['y_diff'])
    
    return df_diff

## Model building + performance analysis Functions

In [20]:
def train_and_test_model_X(df):  # Train and test model for rotations around the X-axis
    from sklearn.model_selection import train_test_split  
    
    df_features=df[['y_diff','z_diff','softmax_y_diff','softmax_z_diff']].agg(list, axis=1)
    X=df_features.values
    
    X=[i for i in X]
    #X = min_max_scaler.fit_transform(X)
    y=df['theta'].values
    y=[i for i in y]
    
    x_train, x_test, y_train, y_test = train_test_split(
        X, y, test_size=0.5, random_state=42)

    from sklearn.ensemble import RandomForestRegressor

    

    model = RandomForestRegressor(random_state=1)
    model.fit(x_train, y_train)
    
    
    score=model.score(x_test,y_test)
    ypred = model.predict(x_test)
    
    mse=mean_squared_error(y_test, ypred)



    return model,score,mse

In [21]:
def train_and_test_model_Y(df): # Train and test model for rotations around the Y-axis
    from sklearn.model_selection import train_test_split  
    
    df_features=df[['x_diff','z_diff','softmax_x_diff','softmax_z_diff']].agg(list, axis=1)
    X=df_features.values
    
    X=[i for i in X]
    y=df['theta'].values
    y=[i for i in y]
    
    x_train, x_test, y_train, y_test = train_test_split(
        X, y, test_size=0.5, random_state=42)

    from sklearn.ensemble import RandomForestRegressor

    

    model = RandomForestRegressor(random_state=1)
    model.fit(x_train, y_train)
    
    
    score=model.score(x_test,y_test)
    ypred = model.predict(x_test)
    
    mse=mean_squared_error(y_test, ypred)



    return model,score,mse

In [22]:
def train_and_test_model_Z(df): # Train and test model for rotations around the Z-axis
    from sklearn.model_selection import train_test_split  
    
    df_features=df[['x_diff','y_diff','softmax_x_diff','softmax_y_diff']].agg(list, axis=1)
    X=df_features.values
    
    X=[i for i in X]
    
    y=df['theta'].values
    y=[i for i in y]
    
    x_train, x_test, y_train, y_test = train_test_split(
        X, y, test_size=0.5, random_state=42)

    from sklearn.ensemble import RandomForestRegressor

    

    model = RandomForestRegressor(random_state=1)
    model.fit(x_train, y_train)
    
    
    score=model.score(x_test,y_test)
    ypred = model.predict(x_test)
    
    mse=mean_squared_error(y_test, ypred)



    return model,score,mse

In this part, we are going to apply the random rotations and train the models

# Read all files and store them in a list

In [23]:
location="C:/Users/chris/OneDrive/Desktop/reports"  # use the location of the files: Put all the files in the same folder and copy past its location. please replace all '\' with '/' 

df_list=get_list_of_dataframe(location)

# Apply Random rotations to all dataframes contained in the list and  get a new list of rotated dataframes

## list of randomly rotated dataframes for X-axis 

In [26]:
list_all_rotated_dataframes_X=[]

for dataframe in df_list:
    list_all_rotated_dataframes_X.append(apply_random_rotations_to_dataframe_X(dataframe))

## list of randomly rotated dataframes for Y-axis

In [27]:
list_all_rotated_dataframes_Y=[]

for dataframe in df_list:
    list_all_rotated_dataframes_Y.append(apply_random_rotations_to_dataframe_Y(dataframe))

## list of randomly rotated dataframes for Z-axis 

In [28]:
list_all_rotated_dataframes_Z=[]

for dataframe in df_list:
    list_all_rotated_dataframes_Z.append(apply_random_rotations_to_dataframe_Z(dataframe))

# concatenate the list of  dataframes by axis

In [29]:
df_X=pd.concat(list_all_rotated_dataframes_X)

In [30]:
df_Y=pd.concat(list_all_rotated_dataframes_Y)

In [31]:
df_Z=pd.concat(list_all_rotated_dataframes_Z)

# View of the rotated Dataframes

In [44]:
df_X

Unnamed: 0_level_0,x,y,z,theta,y_diff,z_diff,softmax_y_diff,softmax_z_diff
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
20,0.0,-2.0,14.0,0.0,5.0,-1.0,0.0,4.258743e-280
40,19.0,-5.0,8.0,0.0,-3.0,-6.0,0.0,2.869519e-282
60,50.0,6.0,38.0,0.0,11.0,30.0,0.0,1.237116e-266
80,15.0,-12.0,9.0,0.0,-18.0,-29.0,0.0,2.944665e-292
100,-48.0,-12.0,4.0,0.0,0.0,-5.0,0.0,7.800160e-282
...,...,...,...,...,...,...,...,...
5099880,21.0,3.0,2.0,0.0,25.0,3.0,0.0,3.277434e-276
5099900,22.0,4.0,6.0,0.0,1.0,4.0,0.0,8.908990e-276
5099920,8.0,-21.0,-1.0,0.0,-25.0,-7.0,0.0,1.487953e-280
5099940,11.0,-31.0,-5.0,0.0,-10.0,-4.0,0.0,2.988633e-279


In [45]:
df_Y

Unnamed: 0_level_0,x,y,z,theta,x_diff,z_diff,softmax_x_diff,softmax_z_diff
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
20,0.0,-2.0,14.0,0.0,4.0,-1.0,5.752741e-290,8.621819e-264
40,19.0,-5.0,8.0,0.0,19.0,-6.0,1.880581e-283,5.809336e-266
60,50.0,6.0,38.0,0.0,31.0,30.0,3.060736e-278,2.504539e-250
80,15.0,-12.0,9.0,0.0,-35.0,-29.0,6.643395e-307,5.961470e-276
100,-48.0,-12.0,4.0,0.0,-63.0,-5.0,4.593526e-319,1.579141e-265
...,...,...,...,...,...,...,...,...
5099880,21.0,3.0,2.0,0.0,19.0,3.0,0.000000e+00,6.990989e-318
5099900,22.0,4.0,6.0,0.0,1.0,4.0,0.000000e+00,1.900348e-317
5099920,8.0,-21.0,-1.0,0.0,-14.0,-7.0,0.000000e+00,3.162020e-322
5099940,11.0,-31.0,-5.0,0.0,3.0,-4.0,0.000000e+00,6.373447e-321


In [46]:
df_Z

Unnamed: 0_level_0,x,y,z,theta,x_diff,y_diff,softmax_x_diff,softmax_y_diff
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
20,0.0,-2.0,14.0,0.0,4.0,5.0,8.428435e-315,0.0
40,19.0,-5.0,8.0,0.0,19.0,-3.0,2.755270e-308,0.0
60,50.0,6.0,38.0,0.0,31.0,11.0,4.484334e-303,0.0
80,15.0,-12.0,9.0,0.0,-35.0,-18.0,0.000000e+00,0.0
100,-48.0,-12.0,4.0,0.0,-63.0,0.0,0.000000e+00,0.0
...,...,...,...,...,...,...,...,...
5099880,21.0,3.0,2.0,0.0,19.0,25.0,0.000000e+00,0.0
5099900,22.0,4.0,6.0,0.0,1.0,1.0,0.000000e+00,0.0
5099920,8.0,-21.0,-1.0,0.0,-14.0,-25.0,0.000000e+00,0.0
5099940,11.0,-31.0,-5.0,0.0,3.0,-10.0,0.000000e+00,0.0


# Model building for each rotation

In [38]:
model_X=train_and_test_model_X(df_X)

In [39]:
model_Y=train_and_test_model_Y(df_Y)

In [40]:
model_Z=train_and_test_model_Z(df_Z)

In [41]:
model_X

(RandomForestRegressor(random_state=1), 0.8071030883465027, 2239.4858765391423)

In [42]:
model_Y

(RandomForestRegressor(random_state=1), 0.7029431393542227, 3998.0749737165565)

In [47]:
model_Z

(RandomForestRegressor(random_state=1), 0.5488598382343686, 5894.213776159801)

In [48]:
math.sqrt(2239.4858765391423)

47.32320653272707

In [None]:
math.sqrt(3998.0749737165565)

In [50]:
math.sqrt(5894.213776159801)

76.77378313043978