## ANN / MLP

In [1]:
import pandas as pd
import numpy as np
from scipy.io import arff
import sklearn.model_selection
import sklearn.preprocessing
import sklearn.metrics
import sklearn.neural_network

Importing the 'Adult' dataset

In [2]:
adult = pd.read_csv('adult_cleaned.csv')
adult.head()

Unnamed: 0,age,fnlwgt,education_num,capital_gain,capital_loss,hours/wk,salary,workclass_ ?,workclass_ Federal-gov,workclass_ Local-gov,...,native_country_ Portugal,native_country_ Puerto-Rico,native_country_ Scotland,native_country_ South,native_country_ Taiwan,native_country_ Thailand,native_country_ Trinadad&Tobago,native_country_ United-States,native_country_ Vietnam,native_country_ Yugoslavia
0,39,77516,13,2174,0,40,1,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1,50,83311,13,0,0,13,1,0,0,0,...,0,0,0,0,0,0,0,1,0,0
2,38,215646,9,0,0,40,1,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,53,234721,7,0,0,40,1,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,28,338409,13,0,0,40,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [3]:
# Converting pandas series to numpy arrays
adult_Y = adult['salary'].to_numpy()

adult_X = adult.iloc[:, adult.columns != 'salary'].to_numpy()
#adult_X

In [4]:
# Scaling the data using a Standard scaler
std_scaler = sklearn.preprocessing.StandardScaler()

In [5]:
adult_X_std = std_scaler.fit_transform(adult_X)

Using the 'Gamma' dataset now

In [6]:
gamma = pd.read_csv('gamma_cleaned.csv')
gamma.head()

Unnamed: 0,fLength,fWidth,fSize,fConc,fConc1,fAsym,fM3Long,fM3Trans,fAlpha,fDist,class
0,28.7967,16.0021,2.6449,0.3918,0.1982,27.7004,22.011,-8.2027,40.092,81.8828,1
1,31.6036,11.7235,2.5185,0.5303,0.3773,26.2722,23.8238,-9.9574,6.3609,205.261,1
2,162.052,136.031,4.0612,0.0374,0.0187,116.741,-64.858,-45.216,76.96,256.788,1
3,23.8172,9.5728,2.3385,0.6147,0.3922,27.2107,-6.4633,-7.1513,10.449,116.737,1
4,75.1362,30.9205,3.1611,0.3168,0.1832,-5.5277,28.5525,21.8393,4.648,356.462,1


In [7]:
# Converting pandas series to numpy arrays
gamma_Y = gamma['class'].to_numpy()

gamma_X = gamma.iloc[:, gamma.columns != 'class'].to_numpy()

In [8]:
# Scaling the gamma data using a Standard scaler
gamma_X_std = std_scaler.fit_transform(gamma_X)

Importing the cleaned 'Eye' data:

In [9]:
eye_df = pd.read_csv('eye_cleaned.csv')
eye_df.head()

Unnamed: 0,AF3,F7,F3,FC5,T7,P7,O1,O2,P8,T8,FC6,F4,F8,AF4,eyeDetection
0,4329.23,4009.23,4289.23,4148.21,4350.26,4586.15,4096.92,4641.03,4222.05,4238.46,4211.28,4280.51,4635.9,4393.85,-1
1,4324.62,4004.62,4293.85,4148.72,4342.05,4586.67,4097.44,4638.97,4210.77,4226.67,4207.69,4279.49,4632.82,4384.1,-1
2,4327.69,4006.67,4295.38,4156.41,4336.92,4583.59,4096.92,4630.26,4207.69,4222.05,4206.67,4282.05,4628.72,4389.23,-1
3,4328.72,4011.79,4296.41,4155.9,4343.59,4582.56,4097.44,4630.77,4217.44,4235.38,4210.77,4287.69,4632.31,4396.41,-1
4,4326.15,4011.79,4292.31,4151.28,4347.69,4586.67,4095.9,4627.69,4210.77,4244.1,4212.82,4288.21,4632.82,4398.46,-1


In [10]:
# Converting pandas series to numpy arrays
eye_Y = eye_df['eyeDetection'].to_numpy()
eye_X = eye_df.iloc[:, eye_df.columns != 'eyeDetection'].to_numpy()

In [11]:
# Scaling the eye data arrays using a Standard scaler
eye_X_std = std_scaler.fit_transform(eye_X)

Importing the cleaned 'Occupancy' dataset

In [12]:
occupancy_df = pd.read_csv('occupancy_cleaned.csv')
occupancy_df.head()

Unnamed: 0,Temperature,Humidity,Light,CO2,HumidityRatio,Occupancy
0,23.18,27.272,426.0,721.25,0.004793,1
1,23.15,27.2675,429.5,714.0,0.004783,1
2,23.15,27.245,426.0,713.5,0.004779,1
3,23.15,27.2,426.0,708.25,0.004772,1
4,23.1,27.2,426.0,704.5,0.004757,1


In [13]:
# Converting pandas series to numpy arrays
occupancy_Y = occupancy_df['Occupancy'].to_numpy()
occupancy_X = occupancy_df.iloc[:, occupancy_df.columns != 'Occupancy'].to_numpy()

In [14]:
# Scaling the occupancy dataset using a Standard scaler
occupancy_X_std = std_scaler.fit_transform(occupancy_X)

Running 5 trials of MLP on each dataset:

In [15]:
# List of all hyper parameters for ANN
ann_hidden_units = [1, 2, 4, 8, 32, 128]
ann_momentum = [0, 0.2, 0.5, 0.9]

In [16]:
# This function can be called on a given dataset and it will run
# MLP classification on it
def ANN_solver(X_data, Y_data):

    # These lists will store the training and testing
    # performance metrics across all 5 trials
    trial_scores_test = []
    trial_scores_train = []

    for i in range(5):
        # Splitting data into training (5000 samples) and testing (remaining data points)
        # Note: we are shuffling the data before sampling for randomness
        
        X_train, data_X_test, Y_train, data_Y_test = sklearn.model_selection.train_test_split(
            X_data, Y_data, train_size = 5000, shuffle = True, stratify = Y_data)

        # Creating a multiple layer perceptron Classifier model
        MLP_model = sklearn.neural_network.MLPClassifier(max_iter = 2000)

        # GridSearch object to cycle through hyperparameters along with cross validation
        # (verbose = 3 for output/debugging)
        data_grid_search = sklearn.model_selection.GridSearchCV(estimator = MLP_model,
                                                           cv = 5,
                                                           param_grid = [{'hidden_layer_sizes': ann_hidden_units,
                                                                        'momentum': ann_momentum,
                                                                        'solver': ['sgd']},
                                                                         {'hidden_layer_sizes': ann_hidden_units,
                                                                        'solver': ['adam']}
                                                                        ],
                                                           scoring = ['accuracy', 'f1', 'roc_auc'],
                                                           refit = 'accuracy', verbose = 3, n_jobs = -1)

        # Fitting the training data to perform a 5 fold cross validation
        data_grid_search.fit(X_train, Y_train)
        data_grid_results = pd.DataFrame(data_grid_search.cv_results_)

        # Obtaining the dataframe indicies corresponding to the best accuracy, f1, and roc_auc
        # and obtaining the best parameters corresponding to those index values
        data_ind_best_accuracy = data_grid_results['mean_test_accuracy'].idxmax()

        data_accuracy_best_param_hidden_layer = data_grid_results['param_hidden_layer_sizes'][data_ind_best_accuracy]
        data_accuracy_best_param_momentum = data_grid_results['param_momentum'][data_ind_best_accuracy]
        data_accuracy_best_param_solver = data_grid_results['param_solver'][data_ind_best_accuracy]

        data_ind_best_f1 = data_grid_results['mean_test_f1'].idxmax()

        data_f1_best_param_hidden_layer = data_grid_results['param_hidden_layer_sizes'][data_ind_best_f1]
        data_f1_best_param_momentum = data_grid_results['param_momentum'][data_ind_best_f1]
        data_f1_best_param_solver = data_grid_results['param_solver'][data_ind_best_f1]

        data_ind_best_roc_auc = data_grid_results['mean_test_roc_auc'].idxmax()

        data_roc_auc_best_param_hidden_layer = data_grid_results['param_hidden_layer_sizes'][data_ind_best_roc_auc]
        data_roc_auc_best_param_momentum = data_grid_results['param_momentum'][data_ind_best_roc_auc]
        data_roc_auc_best_param_solver = data_grid_results['param_solver'][data_ind_best_roc_auc]

        # Creating a list of best hyperparams per metric 
        list_best_param_roc_auc = [data_roc_auc_best_param_hidden_layer,
                            data_roc_auc_best_param_momentum,
                           data_roc_auc_best_param_solver]

        list_best_param_f1 = [data_f1_best_param_hidden_layer,
                                    data_f1_best_param_momentum,
                                    data_f1_best_param_solver]

        list_best_param_accuracy = [data_accuracy_best_param_hidden_layer,
                                   data_accuracy_best_param_momentum,
                                   data_accuracy_best_param_solver]

        # This is used to create a dataframe to cycle through the parameters while re-training
        overall_best_params = [list_best_param_accuracy, list_best_param_f1, list_best_param_roc_auc]

        # The row indicies are ['accuracy', 'f1', 'roc_auc']
        best_hyperparam = pd.DataFrame(overall_best_params, columns = ['hidden_layer', 'momentum', 'solver'])

        # These training and testing lists store the performance of each
        # model (3 models as there are 3 metrics)
        model_scores_test = []
        model_scores_train = []
        
        for i in range(len(best_hyperparam)):

            # Stores the training and testing scores of a model in the loop
            test_scores = []
            train_scores = []

            if (best_hyperparam['solver'][i] == 'adam'):
                MLP_model_final = sklearn.neural_network.MLPClassifier(
                    solver = 'adam',
                    hidden_layer_sizes = best_hyperparam['hidden_layer'][i],
                    max_iter = 2000
                )
    
            else:
                MLP_model_final = sklearn.neural_network.MLPClassifier(
                    solver = 'sgd',
                    max_iter = 2000,
                    momentum = best_hyperparam['momentum'][i],
                    hidden_layer_sizes = best_hyperparam['hidden_layer'][i]
                )
            
            # Fitting the corresponding model once more with the whole training data
            MLP_model_final.fit(X_train, Y_train)
            
            # Test data performance
            data_Y_pred = MLP_model_final.predict(data_X_test)
            
            test_scores.append(sklearn.metrics.accuracy_score(data_Y_test, data_Y_pred))
            test_scores.append(sklearn.metrics.f1_score(data_Y_test, data_Y_pred))
            test_scores.append(sklearn.metrics.roc_auc_score(data_Y_test, data_Y_pred))

            model_scores_test.append(test_scores)
            
            # Training data performance
            data_Y_pred_train = MLP_model_final.predict(X_train)

            train_scores.append(sklearn.metrics.accuracy_score(Y_train, data_Y_pred_train))
            train_scores.append(sklearn.metrics.f1_score(Y_train, data_Y_pred_train))
            train_scores.append(sklearn.metrics.roc_auc_score(Y_train, data_Y_pred_train))
            
            model_scores_train.append(train_scores)
    
        # Storing the performance of each testing data model
        model_df_test = pd.DataFrame(model_scores_test, columns = ['accuracy', 'f1', 'roc_auc'])
        model_mean_test = model_df_test.mean().to_numpy()
        
        # Storing the performance of each training data model
        model_df_train = pd.DataFrame(model_scores_train, columns = ['accuracy', 'f1', 'roc_auc'])
        model_mean_train = model_df_train.mean().to_numpy()
        
        trial_scores_test.append(model_mean_test)
        trial_scores_train.append(model_mean_train)
        
    # These datasets contain all the 3 metrics performances of each trial in a dataframe format
    MLP_data_trial_test = pd.DataFrame(trial_scores_test, columns = ['accuracy', 'f1', 'roc_auc'])
    MLP_data_trial_train = pd.DataFrame(trial_scores_train, columns = ['accuracy', 'f1', 'roc_auc'])
    
    return MLP_data_trial_test, MLP_data_trial_train


Now we can call the above MLP classifier function on each dataset to obtain its testing and training data performances

In [17]:
raw_test_occupancy, raw_train_occupancy = ANN_solver(occupancy_X_std, occupancy_Y)
raw_test_occupancy

Fitting 5 folds for each of 30 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:   25.9s
[Parallel(n_jobs=-1)]: Done 112 tasks      | elapsed:  2.6min
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  3.0min finished


Fitting 5 folds for each of 30 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:   21.2s
[Parallel(n_jobs=-1)]: Done 112 tasks      | elapsed:  2.4min
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  2.8min finished


Fitting 5 folds for each of 30 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:   29.2s
[Parallel(n_jobs=-1)]: Done 112 tasks      | elapsed:  2.4min
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  2.7min finished


Fitting 5 folds for each of 30 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:   21.5s
[Parallel(n_jobs=-1)]: Done 112 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  2.6min finished


Fitting 5 folds for each of 30 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:   27.4s
[Parallel(n_jobs=-1)]: Done 112 tasks      | elapsed:  2.4min
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  2.8min finished


Unnamed: 0,accuracy,f1,roc_auc
0,0.98931,0.977261,0.991038
1,0.989267,0.977159,0.990816
2,0.988689,0.976035,0.991575
3,0.989139,0.97695,0.991608
4,0.91551,0.65081,0.827564


In [18]:
raw_test_gamma, raw_train_gamma = ANN_solver(gamma_X_std, gamma_Y)
raw_test_gamma

Fitting 5 folds for each of 30 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:   15.4s
[Parallel(n_jobs=-1)]: Done 112 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  3.0min finished


Fitting 5 folds for each of 30 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:   17.3s
[Parallel(n_jobs=-1)]: Done 112 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  2.9min finished


Fitting 5 folds for each of 30 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:   18.1s
[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:  2.6min
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  3.0min finished


Fitting 5 folds for each of 30 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:   11.9s
[Parallel(n_jobs=-1)]: Done 112 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  3.1min finished


Fitting 5 folds for each of 30 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:   20.7s
[Parallel(n_jobs=-1)]: Done 112 tasks      | elapsed:  2.6min
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  3.4min finished


Unnamed: 0,accuracy,f1,roc_auc
0,0.867,0.90132,0.837497
1,0.867118,0.901594,0.836846
2,0.865383,0.900209,0.835383
3,0.870233,0.902945,0.844585
4,0.863742,0.897777,0.838775


In [19]:
raw_test_adult, raw_train_adult = ANN_solver(adult_X_std, adult_Y)
raw_test_adult

Fitting 5 folds for each of 30 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:   21.2s
[Parallel(n_jobs=-1)]: Done 112 tasks      | elapsed:  3.0min
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  5.1min finished


Fitting 5 folds for each of 30 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:   24.5s
[Parallel(n_jobs=-1)]: Done 112 tasks      | elapsed:  4.2min
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  6.6min finished


Fitting 5 folds for each of 30 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:   39.6s
[Parallel(n_jobs=-1)]: Done 112 tasks      | elapsed:  4.5min
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  6.9min finished


Fitting 5 folds for each of 30 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:   35.1s
[Parallel(n_jobs=-1)]: Done 112 tasks      | elapsed:  4.8min
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  6.9min finished


Fitting 5 folds for each of 30 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:   45.2s
[Parallel(n_jobs=-1)]: Done 112 tasks      | elapsed:  4.6min
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  6.7min finished


Unnamed: 0,accuracy,f1,roc_auc
0,0.842991,0.898827,0.761532
1,0.841539,0.897946,0.75893
2,0.840511,0.896399,0.766843
3,0.843595,0.899297,0.761484
4,0.844309,0.899849,0.76144


In [20]:
raw_test_eye, raw_train_eye = ANN_solver(eye_X_std, eye_Y)
raw_test_eye

Fitting 5 folds for each of 30 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:    6.4s
[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:   46.9s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  2.0min finished


Fitting 5 folds for each of 30 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:    8.5s
[Parallel(n_jobs=-1)]: Done 112 tasks      | elapsed:   52.5s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  2.2min finished


Fitting 5 folds for each of 30 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:    9.8s
[Parallel(n_jobs=-1)]: Done 112 tasks      | elapsed: 16.6min
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed: 18.0min finished


Fitting 5 folds for each of 30 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:   10.4s
[Parallel(n_jobs=-1)]: Done 112 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  2.9min finished


Fitting 5 folds for each of 30 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:    6.6s
[Parallel(n_jobs=-1)]: Done 112 tasks      | elapsed:   41.6s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  2.3min finished


Unnamed: 0,accuracy,f1,roc_auc
0,0.852705,0.833784,0.849976
1,0.855177,0.837082,0.852778
2,0.859185,0.839727,0.855722
3,0.852772,0.834691,0.850375
4,0.86356,0.845327,0.860514


In [21]:
# Obtaining the mean across all 5 trials per dataset for the testing data

mean_adult_ANN_score = raw_test_adult.mean().to_numpy()
mean_gamma_ANN_score = raw_test_gamma.mean().to_numpy()
mean_eye_ANN_score = raw_test_eye.mean().to_numpy()
mean_occupancy_ANN_score = raw_test_occupancy.mean().to_numpy()

In [22]:
# Obtaining the mean across all 5 trials per dataset for the training data

mean_adult_ANN_score_train = raw_train_adult.mean().to_numpy()
mean_gamma_ANN_score_train = raw_train_gamma.mean().to_numpy()
mean_eye_ANN_score_train = raw_train_eye.mean().to_numpy()
mean_occupancy_ANN_score_train = raw_train_occupancy.mean().to_numpy()

In [23]:
# Saving the average metrics for both testing and training so we don't have to rerun the code

list_of_metrics_across_datasets = [mean_adult_ANN_score, mean_gamma_ANN_score,
                                   mean_eye_ANN_score, mean_occupancy_ANN_score]

list_of_metrics_across_datasets_train = [mean_adult_ANN_score_train, mean_gamma_ANN_score_train,
                                   mean_eye_ANN_score_train, mean_occupancy_ANN_score_train]

In [24]:
pd.DataFrame(list_of_metrics_across_datasets).to_csv('ANN_metrics.csv', index = False)
pd.DataFrame(list_of_metrics_across_datasets_train).to_csv('ANN_metrics_train.csv', index = False)

In [25]:
# Also saving the raw test scores for later use if needed

raw_test_occupancy.to_csv('ANN_occupancy_test.csv', index = False)
raw_test_adult.to_csv('ANN_adult_test.csv', index = False)
raw_test_gamma.to_csv('ANN_gamma_test.csv', index = False)
raw_test_eye.to_csv('ANN_eye_test.csv', index = False)

In [26]:
# saving the raw train scores for later use if needed

raw_train_occupancy.to_csv('ANN_occupancy_train.csv', index = False)
raw_train_adult.to_csv('ANN_adult_train.csv', index = False)
raw_train_gamma.to_csv('ANN_gamma_train.csv', index = False)
raw_train_eye.to_csv('ANN_eye_train.csv', index = False)