In [1]:
# Goal 
# Save the test and train trial data for ML 

In [4]:
import numpy as np 
import pandas 
import xgboost as xgb
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import accuracy_score # This seems to only work for classification
from sklearn.metrics import explained_variance_score
import math


In [5]:
# These are the trials that we have good data for 
acceptable_trials = ['m07_t01_15', 'm07_t03_15', 'm07_t06_15','m10_t02_16','m11_t02_16','m11_t04_16',
                   'm12_t02_16','m14_t05_16', 'm14_t03_16', 'm15_t01_16', 'm15_t03_16']

# Split the data into test and train sets
test_trials = []
train_trials = []
for i in np.arange(0,len(acceptable_trials)):
    test_trials.append(acceptable_trials[i])
    train_trials.append((acceptable_trials[:(i)] + acceptable_trials[(i+1):]))
    
 

In [17]:
# train on N-1 of N trials and test on the removed trial

test_column = 'D10'
predictor_columns = ['Wingbeat_freq', 'M6_c', 'I20_I10', 'M3_i', 'A59_c']

for i in np.arange(0,len(train_trials)): 
    # Prep the data
    # Base case
    trial = train_trials[i][0]
    d = pandas.read_csv('../DataProcessing/ProcessedData/' + trial + '_det.csv')
    d = d[['D10', 'D11', 'D20', 'I20I11_I10', 'I20_I10',
            'fitting_error', 'seconds', 'tif_num', 'M3_c', 'M6_c', 'A51_c', 'A59_c',
            'M3_i', 'M6_i', 'A59_i', 'A51_i', 'Sum', 'peaks', 'ISI']]

    d = d.interpolate() # Fill any nan values
    d = d[['D10', 'I20_I10', 'M3_i', 'A59_c', 'M6_c']]
    d = d.dropna(how = 'any').reset_index(drop = 'True')

    columns = ['M3_i', 'M6_c', 'I20_I10', 'A59_c']
    shift_column_names = []
    for col in columns:
        d[col + '_' + str(0)] = d[col].shift(-1)
        shift_column_names.append(col + '_' + str(0))

    for j in np.arange(1,11):
        for col in columns:
            d[col + '_' + str(j)] = d[col + '_' + str(j - 1)].shift(-1)
            shift_column_names.append(col + '_' + str(j))

    d = d.dropna()

    # Subset the data into predictors (X) and predicted (y)
    X = (d[columns + shift_column_names])
    y = (d[['D10']])


    # Subsequent cases

    # Create training set
    for trial in train_trials[i][1:]: 
        d = pandas.read_csv('../DataProcessing/ProcessedData/' + trial + '_det.csv')
        d = d[['D10', 'D11', 'D20', 'I20I11_I10', 'I20_I10',
               'fitting_error', 'seconds', 'tif_num', 'M3_c', 'M6_c', 'A51_c', 'A59_c',
                'M3_i', 'M6_i', 'A59_i', 'A51_i', 'Sum', 'peaks', 'ISI']]

        d = d.interpolate()
        d = d[['D10', 'I20_I10', 'M3_i', 'A59_c', 'M6_c']]
        d = d.dropna(how = 'any').reset_index(drop = 'True')

        for col in columns:
            d[col + '_' + str(0)] = d[col].shift(-1)
            #shift_column_names.append(col + '_' + str(0))

        for j in np.arange(1,11):
            for col in columns:
                d[col + '_' + str(j)] = d[col + '_' + str(j - 1)].shift(-1)

        d = d.dropna()

        # Subset the data into predictors (X) and predicted (y)
        X = X.append(d[columns + shift_column_names])
        y = y.append(d[['D10']])

    X = X.reset_index(drop = 'True')
    y = y.reset_index(drop = 'True')
    
    # This is where I should export a dataframe
    
    X.to_csv('/Users/sage/Desktop/ML/Train/' + test_trials[i] + '_TrainX.csv', index = False)
    y.to_csv('/Users/sage/Desktop/ML/Train/' + test_trials[i] + '_TrainY.csv', index = False)
    
    del X,y

    # Use the last df as a test set
    trial = test_trials[i]
    d = pandas.read_csv('../DataProcessing/ProcessedData/' + trial + '_det.csv')
    d = d[['D10', 'D11', 'D20', 'I20I11_I10', 'I20_I10',
            'fitting_error', 'seconds', 'tif_num', 'M3_c', 'M6_c', 'A51_c', 'A59_c',
           'M3_i', 'M6_i', 'A59_i', 'A51_i', 'Sum', 'peaks', 'ISI']]
    d = d.interpolate()
    d = d[['D10', 'I20_I10', 'M3_i', 'A59_c', 'M6_c']]
    d = d.dropna(how = 'any').reset_index(drop = 'True')

    # Create time shifted predictor data
    for col in columns:
        d[col + '_' + str(0)] = d[col].shift(-1)
    for j in np.arange(1,11):
        for col in columns:
            d[col + '_' + str(j)] = d[col + '_' + str(j - 1)].shift(-1)
    d = d.dropna()

    X = (d[columns + shift_column_names])
    y = (d[['D10']])
    
    X.to_csv('/Users/sage/Desktop/ML/Test/' + test_trials[i] + '_TestX.csv', index = False)
    y.to_csv('/Users/sage/Desktop/ML/Test/' + test_trials[i] + '_TestY.csv', index = False)




In [15]:
X

Unnamed: 0,M3_i,M6_c,I20_I10,A59_c,M3_i_0,M6_c_0,I20_I10_0,A59_c_0,M3_i_1,M6_c_1,...,I20_I10_8,A59_c_8,M3_i_9,M6_c_9,I20_I10_9,A59_c_9,M3_i_10,M6_c_10,I20_I10_10,A59_c_10
0,999.851313,7.287385,0.672639,7.333290,650.056295,7.316401,0.698508,6.941859,790.992971,7.306035,...,0.666523,7.336216,1081.514321,7.300201,0.646792,7.133146,1141.579305,7.313940,0.681832,7.153714
1,650.056295,7.316401,0.698508,6.941859,790.992971,7.306035,0.790300,7.256932,1042.908166,7.286765,...,0.646792,7.133146,1141.579305,7.313940,0.681832,7.153714,461.424369,7.305876,0.675589,7.264276
2,790.992971,7.306035,0.790300,7.256932,1042.908166,7.286765,0.840734,6.918654,819.646362,7.322817,...,0.681832,7.153714,461.424369,7.305876,0.675589,7.264276,872.064025,7.300234,0.690100,7.215691
3,1042.908166,7.286765,0.840734,6.918654,819.646362,7.322817,0.729914,7.165783,826.471603,7.328093,...,0.675589,7.264276,872.064025,7.300234,0.690100,7.215691,1052.444836,7.325706,0.738946,7.152218
4,819.646362,7.322817,0.729914,7.165783,826.471603,7.328093,0.696635,7.325797,803.796804,7.322310,...,0.690100,7.215691,1052.444836,7.325706,0.738946,7.152218,903.787196,7.342215,0.728054,7.117094
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
156,766.647532,7.302826,0.723009,6.924084,1044.210475,7.286260,0.665980,6.536190,1300.486574,7.300855,...,0.820114,7.305310,663.496431,7.319146,0.755389,7.485490,865.768337,7.313483,0.731097,6.813087
157,1044.210475,7.286260,0.665980,6.536190,1300.486574,7.300855,0.666632,7.338652,1050.900478,7.303309,...,0.755389,7.485490,865.768337,7.313483,0.731097,6.813087,1193.682216,7.313996,0.704269,7.219153
158,1300.486574,7.300855,0.666632,7.338652,1050.900478,7.303309,0.677345,7.263684,1085.988626,7.301317,...,0.731097,6.813087,1193.682216,7.313996,0.704269,7.219153,1038.109594,7.304893,0.650532,7.268863
159,1050.900478,7.303309,0.677345,7.263684,1085.988626,7.301317,0.653794,7.134615,1156.575230,7.303245,...,0.704269,7.219153,1038.109594,7.304893,0.650532,7.268863,1003.494293,7.308672,0.645600,7.087078
