In [1]:
import os
import glob
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from sklearn import model_selection, ensemble, metrics

In [2]:
# Set the base path
base_path = '/kaggle/input/tlvmc-parkinsons-freezing-gait-prediction/'

# Get the file paths for train and test data
train_files = glob.glob(os.path.join(base_path, 'train/**/**'))
test_files = glob.glob(os.path.join(base_path, 'test/**/**'))

# Load the metadata files
tdcsfog_metadata = pd.read_csv(os.path.join(base_path, 'tdcsfog_metadata.csv'))
defog_metadata = pd.read_csv(os.path.join(base_path, 'defog_metadata.csv'))
metadata = pd.concat([tdcsfog_metadata, defog_metadata])
# Load the subjects file
subjects = pd.read_csv(os.path.join(base_path, 'subjects.csv'))

# Load the tasks file
tasks = pd.read_csv(os.path.join(base_path, 'tasks.csv'))

# Load the sample submission file
sub = pd.read_csv(os.path.join(base_path, 'sample_submission.csv'))

In [3]:
def get_data(f):
    df = pd.read_csv(f)
    df['Id'] = f.split('/')[-1].split('.')[0]
    df['data_type'] = f.split('/')[-2]
    return df
# Get the train data
train_defog_files = glob.glob(os.path.join(base_path, 'train/defog/**'))
train_tdcsfog_files = glob.glob(os.path.join(base_path, 'train/tdcsfog/**'))
df_train_defog = pd.concat([get_data(f) for f in train_defog_files])
df_train_tdcsfog = pd.concat([get_data(f) for f in train_tdcsfog_files])
df_train = pd.concat([df_train_defog, df_train_tdcsfog])


print(df_train.isnull().sum().sum())

print(df_train_defog.shape)
print(df_train_tdcsfog.shape)

print(df_train.shape)

# Fill missing values with 0
df_train.fillna(0, inplace=True)

14125344
(13525702, 11)
(7062672, 9)
(20588374, 11)


In [4]:
# Features
df_train['VelMagnitude'] = np.sqrt(df_train['AccV'] ** 2 + df_train['AccML'] ** 2 + df_train['AccAP'] ** 2)
df_train['JerkMagnitude'] = np.sqrt(df_train['VelMagnitude'].diff() ** 2)
df_train['MeanVelocity'] = (df_train['AccV'] + df_train['AccML'] + df_train['AccAP'])/3
df_train['PeakAccV'] = df_train['AccV'].max()
df_train['PeakAccML'] = df_train['AccML'].max()
df_train['PeakAccAP'] = df_train['AccAP'].max()

df_train.fillna(0, inplace=True)

features=['Time', 'AccV', 'AccML', 'AccAP','VelMagnitude','JerkMagnitude','MeanVelocity','PeakAccV','PeakAccML','PeakAccAP']
Targets=['StartHesitation', 'Turn' , 'Walking']
X_train, X_valid, y_train, y_valid = model_selection.train_test_split(df_train[features], df_train[Targets], test_size=.30, random_state=42)


In [5]:
import gc
del df_train
gc.collect()

81

In [6]:
model_Reg = ensemble.RandomForestRegressor(n_estimators=100, max_depth=7, n_jobs=-1, random_state=42)
model_Reg.fit(X_train, y_train)

RandomForestRegressor(max_depth=7, n_jobs=-1, random_state=42)

In [7]:
print(metrics.average_precision_score(y_valid, model_Reg.predict(X_valid).clip(0.0,1.0)))

0.3372607054144166


In [8]:
sub = pd.read_csv(base_path+'sample_submission.csv')
test = glob.glob(base_path+'test/**/**')

sub['t'] = 0
submission = []
for f in test:
    df = pd.read_csv(f)
    df['Id'] = f.split('/')[-1].split('.')[0]
    df = df.fillna(0).reset_index(drop=True)
    df['VelMagnitude'] = np.sqrt(df['AccV'] ** 2 + df['AccML'] ** 2 + df['AccAP'] ** 2)
    df['JerkMagnitude'] = np.sqrt(df['VelMagnitude'].diff() ** 2)
    df['MeanVelocity'] = (df['AccV'] + df['AccML'] + df['AccAP'])/3
    df['PeakAccV'] = df['AccV'].max()
    df['PeakAccML'] = df['AccML'].max()
    df['PeakAccAP'] = df['AccAP'].max()
    # Fill missing values with 0
    df.fillna(0, inplace=True)

    res = pd.DataFrame(np.round(model_Reg.predict(df[features]),3), columns=['StartHesitation', 'Turn' , 'Walking'])
    df = pd.concat([df,res], axis=1)
    df['Id'] = df['Id'].astype(str) + '_' + df['Time'].astype(str)
    submission.append(df[['Id','StartHesitation', 'Turn' , 'Walking']])
submission = pd.concat(submission)
submission = pd.merge(sub[['Id','t']], submission, how='left', on='Id').fillna(0.0)
submission[['Id','StartHesitation', 'Turn' , 'Walking']].to_csv('submission.csv', index=False)

In [9]:
del submission['t']
submission

Unnamed: 0,Id,StartHesitation,Turn,Walking
0,003f117e14_0,0.001,0.004,0.001
1,003f117e14_1,0.001,0.004,0.001
2,003f117e14_2,0.001,0.004,0.001
3,003f117e14_3,0.001,0.004,0.001
4,003f117e14_4,0.001,0.004,0.001
...,...,...,...,...
286365,02ab235146_281683,0.000,0.000,0.005
286366,02ab235146_281684,0.000,0.000,0.005
286367,02ab235146_281685,0.000,0.000,0.005
286368,02ab235146_281686,0.000,0.000,0.005
