In [None]:
#this notebook takes the seperate data from each impact and combines them into one file.
#INITIALISATION, FINDING DATA
import random
import os
import numpy as np
import pandas as pd


path_113 = "D:/MASTERS/HELMET_SIMULATIONS"
dir_list = os.listdir(path_113)
n = len(dir_list) #number of impacts
print(path_113) #path of main folder with 113 impacts inside
print(dir_list) #list of impacts
print('number of impacts found:', len(dir_list)) #number of impacts

#note the data is manipulated as a dictionary and then converted to a dataframe before exporting to csv.


In [None]:
#STRAIN OUTPUTS/ SIMULATION RESULTS

#create empty dictionary:
data_dict = {}
#creating the labels for the data
#also want impact type (e.g. nYR),and kinematics metrics (e.g max rot acc).
data_dict['Impact label'] = []
data_dict['Strain 90th%'] = []
data_dict['Strain rate 90th%'] = []

print(data_dict) #check it looks right

for i in range(n): 
    #print('i = ', i)
    #filling in the dictionary by accessing the individual files for each impact
    path_strains = path_113 + '/' + dir_list[i] + '/90thpercentile_results.csv' #main path of files + impact path + csv filename
    strain_result_df = pd.read_csv(path_strains,header=None) #data frame extracted from csv (has no column headers)

    #individual accessed values assigned to variables
    label = strain_result_df.iat[0,0] #label e.g date_helmet_impact_processed
    strain = strain_result_df.iat[0,2] #e.g 0.234
    strainrate = strain_result_df.iat[1,2] #e.g 0.344

    #adding values to the dictionary
    data_dict['Impact label'].append(label)
    data_dict['Strain 90th%'].append(strain)
    data_dict['Strain rate 90th%' ].append(strainrate)

    print('data for', label, 'added') #check dictionary looks right

print(data_dict)

In [None]:
#sanity check: checking matches for different impacts
impact_no  = random.randint(0, 113)
print(data_dict['Impact label'][impact_no])
print(data_dict['Strain 90th%'][impact_no])
print(data_dict['Strain rate 90th%'][impact_no])

In [None]:
#creating a CSV with dictionary so far
df = pd.DataFrame(data_dict)
df.to_csv('E:/MASTERS/IMPACT_STRAINS.csv', index=False)

In [None]:
#adding INPUTS/FEATURES:

#MAX LINEAR and MAX ROTATIONAL acceleration, XYZ max rotational accelerations, xyz rotational velocity.
#some axis should be swapped

#add extra columns to the dictionary
data_dict['peak rot acc resultant'] = []
data_dict['peak lin acc resultant'] = []
data_dict['peak rot vel resultant'] = []

data_dict['peak x rot acc'] = []
data_dict['peak y rot acc'] = []
data_dict['peak z rot acc'] = []

data_dict['peak x rot vel'] = []
data_dict['peak y rot vel'] = []
data_dict['peak z rot vel'] = []

data_dict['peak x lin acc'] = []
data_dict['peak y lin acc'] = []
data_dict['peak z lin acc'] = []

#find maximum values for each impact and desired column in the dictionary
for k in range(n):

      #access input raw data
      path_raw = path_113 + '/' + dir_list[k] + '/' + dir_list[k] + '.csv'
      #print(path_raw)
      raw_df = pd.read_csv(path_raw)
      #reverse required cols
      raw_df['la_x'] = - raw_df['la_x']
      raw_df['la_z'] = - raw_df['la_z']
      raw_df['rv_x'] = - raw_df['rv_x']
      raw_df['rv_y'] = - raw_df['rv_y']
      raw_df['ra_x'] = - raw_df['ra_x']
      raw_df['ra_y'] = - raw_df['ra_y']
      
      #if k == 0: #printing the first dataframe to make sure it looks correct.
      #      print(raw_df)
      
      #RESULTANTS
      #accessing the labelled columns as numpy arrays
      raw_ra_r = raw_df['ra_r'].values
      raw_la_r = raw_df['la_r'].values 
      raw_rv_r = raw_df['rv_r'].values
      #taking from positive time:
      ra_r = raw_ra_r[200:]
      la_r = raw_la_r[200:]
      rv_r = raw_rv_r[200:]
      #finding maximum magnitude values and #appending to the dictionary
      data_dict['peak rot acc resultant'].append(ra_r[(np.abs(ra_r)).argmax()])
      data_dict['peak lin acc resultant'].append(la_r[(np.abs(la_r)).argmax()])
      data_dict['peak rot vel resultant'].append(rv_r[(np.abs(rv_r)).argmax()])

      #COMPONENTS:ROT ACC
      #accessing the labelled columns as numpy arrays
      raw_ra_x = raw_df['ra_x'].values
      raw_ra_y = raw_df['ra_y'].values
      raw_ra_z = raw_df['ra_z'].values
      #taking from positive time
      ra_x = raw_ra_x[200:]
      ra_y = raw_ra_y[200:]
      ra_z = raw_ra_z[200:]
      #finding maximum values, appending to dataframe
      data_dict['peak x rot acc'].append(ra_x[(np.abs(ra_x)).argmax()])
      data_dict['peak y rot acc'].append(ra_y[(np.abs(ra_y)).argmax()])
      data_dict['peak z rot acc'].append(ra_z[(np.abs(ra_z)).argmax()])

      #COMPONENTS:LIN ACC
      #accessing the labelled columns as numpy arrays
      raw_la_x = raw_df['la_x'].values
      raw_la_y = raw_df['la_y'].values
      raw_la_z = raw_df['la_z'].values
      #taking from positive time
      la_x = raw_la_x[200:]
      la_y = raw_la_y[200:]
      la_z = raw_la_z[200:]
      #finding maximum absolute values, appending to dataframe (argmax returns the index of the largest value)
      data_dict['peak x lin acc'].append(la_x[(np.abs(la_x)).argmax()])
      data_dict['peak y lin acc'].append(la_y[(np.abs(la_y)).argmax()])
      data_dict['peak z lin acc'].append(la_z[(np.abs(la_z)).argmax()])

      #COMPONENTS:ROT VEL
      #accessing the labelled columns as numpy arrays
      raw_rv_x = raw_df['rv_x'].values
      raw_rv_y = raw_df['rv_y'].values
      raw_rv_z = raw_df['rv_z'].values
      #taking from positive time
      rv_x = raw_rv_x[200:]
      rv_y = raw_rv_y[200:]
      rv_z = raw_rv_z[200:]
      #finding maximum absolute values, appending to dataframe
      data_dict['peak x rot vel'].append(rv_x[(np.abs(rv_r)).argmax()])
      data_dict['peak y rot vel'].append(rv_y[(np.abs(rv_y)).argmax()])
      data_dict['peak z rot vel'].append(rv_z[(np.abs(rv_z)).argmax()])


In [None]:
#sanity check with new data
impact_no  = random.randint(0, 113)
print(data_dict['Impact label'][impact_no])
print(data_dict['Strain 90th%'][impact_no])
print(data_dict['Strain rate 90th%'][impact_no])
print(data_dict['peak rot acc resultant'][impact_no])
print(data_dict['peak lin acc resultant'][impact_no])
print(data_dict['peak rot vel resultant'][impact_no])
print(data_dict['peak x rot acc'][impact_no], '(reversed)') #reversed axis
print(data_dict['peak y rot acc'][impact_no], '(reversed)') #reversed axis
print(data_dict['peak z rot acc'][impact_no])
print(data_dict['peak x rot vel'][impact_no], '(reversed)') #reversed axis
print(data_dict['peak y rot vel'][impact_no], '(reversed)') #reversed axis
print(data_dict['peak z rot vel'][impact_no])
print(data_dict['peak x lin acc'][impact_no], '(reversed)') #reversed axis
print(data_dict['peak y lin acc'][impact_no])
print(data_dict['peak z lin acc'][impact_no], '(reversed)') #reversed axis
print(data_dict)

In [None]:
#add IMPACT TYPE 2024 data e.g. nYR, pZR
data_dict['Type'] = []

for i in range(n): #n defined as number of impacts
    #filling in the dictionary by accessing the individual files for each impact
    path_raw = path_113 + '/' + dir_list[i] + '/' + dir_list[i] + '.csv' #main path of files + impact path + csv filename
    raw_df = pd.read_csv(path_raw,header=None) #data frame extracted from csv (has no column headers)

    #individual accessed values assigned to variables
    type = raw_df.iat[1,14] #type e.g nYR pZR

    #adding values to the dictionary
    data_dict['Type'].append(type)

    #print('i = ', i,'type =', type) #check outputs look right



In [None]:
#DOING FOR 2020 DATA.
import pandas as pd
#FINDING impact strains
#STRAIN OUTPUTS/ SIMULATION RESULTS
#path_outputs = 'D:/MASTERS/Bicycle_2020/Results/Results_2.csv'
path_outputs = 'D:/MASTERS/Bicycle_2020-[original_directory]/Results/Results_3_duplicates_removed.csv'
#path_inputs = 'D:/MASTERS/K_files_2020'
path_inputs = 'D:/MASTERS/K_files_2020_duplicates_removed'
#create empty dictionary:
data_dict_2020 = {}
#creating the labels for the data
#also want impact type (e.g. nYR),and kinematics metrics (e.g max rot acc).
data_dict_2020['Impact label'] = []
data_dict_2020['Strain 90th%'] = []
data_dict_2020['Strain rate 90th%'] = []

print(data_dict_2020) #check it looks right

#for each line IN THE RESULTS FILE, add label, strain and strain rate. 
results = pd.read_csv(path_outputs) #opening the results as a dataframe.
#print(results)
n = len(results)

for i in range(n): 

    #individual accessed values assigned to variables
    label = results['name'][i] + '_' + results['test_ID'][i] #label = ABUS GAMECHANJJ_acc_f_01_R_04_X test id is name of input csv.
    strain = results['90thPercentileStrainRate'][i] #e.g 0.234 OKAY SO THIS WAS LABELLED WRONG IN THE SPREADSHEET SO STRAIN=STRAINRATE AND VICE VERSA!!!
    strainrate = results['90thPercentileStrain'][i] #e.g 0.344

    #adding values to the dictionary
    data_dict_2020['Impact label'].append(label)
    data_dict_2020['Strain 90th%'].append(strain)
    data_dict_2020['Strain rate 90th%' ].append(strainrate)

    #print('data for', label, 'added') #check dictionary looks right

#print(data_dict_2020)
print(n)
print(data_dict_2020['Impact label'])
print(len(data_dict_2020['Impact label']))

In [None]:
impact_no  = random.randint(0, 81)
print(data_dict_2020['Impact label'][impact_no])
print(data_dict_2020['Strain 90th%'][impact_no])
print(data_dict_2020['Strain rate 90th%'][impact_no])

In [None]:
#searching through all input files, finding peak values
# then appending to the dictionary, saving as dataframe.

#add extra columns to the dictionary
headings = ['peak rot acc resultant','peak lin acc resultant','peak rot vel resultant',
            'peak x rot acc','peak y rot acc','peak z rot acc',
            'peak x rot vel','peak y rot vel','peak z rot vel',
            'peak x lin acc','peak y lin acc','peak z lin acc']
for x in headings: #creating empty series in the dictionary.
    data_dict_2020[x] = []

input_headings = ['ra_r','la_r','rv_r',
                  'ra_x','ra_y','ra_z',
                  'rv_x','rv_y','rv_z',
                  'la_x', 'la_y','la_z']

#opening each file in the dataframe:
for k in range(n):
    impact_file_temp = data_dict_2020['Impact label'][k][-15:] #last 15 characters of the string are the csv that the input kinematics are stored in.
    temp_location = path_inputs + '/' + 'FULL_' + impact_file_temp + '.csv'
    data = pd.read_csv(temp_location)

    #finding peak values for each heading
    for j in range(12):
        temp_list = data[input_headings[j]].values
        index_of_peak_temp = np.abs(temp_list).argmax() #taking index of greatest absolute value
        data_dict_2020[headings[j]].append(temp_list[index_of_peak_temp]) #appending to the 
    #print(impact_file_temp)
    #print(temp_location)

#adding types, xyz
data_dict_2020['Type'] = []
for j in range(n):
    data_dict_2020['Type'].append(results['test_ID'][j][-1:])

#print(pd.DataFrame(data_dict_2020))  

In [None]:
#creating a csv with all columns
df = pd.DataFrame(data_dict_2020)
#df.to_csv('E:/MASTERS/IMPACT_DATA.csv', index=False)
#2020 data
df.to_csv('D:/MASTERS/IMPACT_DATA_2020_2.csv', index=False)

In [None]:
#COMBINING THE DATASETS INTO 1 COMBINED DATASET

data_df_2020 = pd.read_csv('D:/MASTERS/IMPACT_DATA_2020_2.csv')
data_df_2024 = pd.read_csv('D:/MASTERS/IMPACT_DATA.csv')

combined_df = pd.concat([data_df_2024, data_df_2020], axis=0, ignore_index=True)

combined_df.to_csv('D:/MASTERS/IMPACT_DATA_COMBINED_SWAP.csv',index=False)