In [37]:
# Import libraries
import pandas as pd
import numpy as np
import os


In [38]:
# Gather list of filepaths and UIDs
data_path = "data/blind_test/test/" # leave trailing slash so that we can append the filename
file_paths = []
uids = []
for file_name in os.listdir(data_path):
    file_path = data_path + file_name
    uid = file_name.split(".")[0]

    file_paths.append(file_path)
    uids.append(uid)

df = pd.DataFrame({"uid": uids, "file_path": file_paths})
display(df)

Unnamed: 0,uid,file_path
0,ABNTSS552,data/blind_test/test/ABNTSS552.csv
1,ADQRPH513,data/blind_test/test/ADQRPH513.csv
2,AEEEIG737,data/blind_test/test/AEEEIG737.csv
3,AFEOPC672,data/blind_test/test/AFEOPC672.csv
4,AGHXWX765,data/blind_test/test/AGHXWX765.csv
...,...,...
64,DQALNF634,data/blind_test/test/DQALNF634.csv
65,DRFBTQ594,data/blind_test/test/DRFBTQ594.csv
66,DRHUVS991,data/blind_test/test/DRHUVS991.csv
67,DVDCBJ239,data/blind_test/test/DVDCBJ239.csv


In [39]:
# Load features from individual CSVs into a single dataframe
def get_features(file_path):
    feature_df = pd.read_csv(file_path, header=None)
    return feature_df.iloc[0].values.tolist()


features_df = df[["file_path"]].apply(
    lambda row: get_features(row[0]), axis=1, result_type="expand"
)
display(features_df)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,0.000233,0.006601,-0.001318,0.001098,-0.047733,-0.071875,0.000654,0.000902,-0.193959,0.000319,...,-0.064665,-0.444786,-0.879349,1.048909,0.213126,1.170847,-1.172747,1.686595,0.482523,-0.440434
1,0.000233,0.003029,0.001606,0.001224,-0.092386,-0.434045,0.000668,-0.000410,-0.228858,0.000444,...,-0.147889,1.168724,-0.486698,1.134707,-0.029372,0.092189,-0.791921,1.786787,2.089036,-0.690614
2,0.000464,0.006611,0.000842,0.001412,-0.152744,-0.355706,0.000906,-0.001229,-0.320724,0.000493,...,0.411773,0.232481,-0.527885,-0.305296,-0.189008,-0.592684,-1.144780,3.459698,-0.199579,-0.999165
3,0.000441,0.006178,-0.000811,0.003572,-0.108863,-0.302020,0.000761,0.001851,-0.197981,0.000310,...,0.457373,-0.782917,-1.072765,1.180279,-0.111142,1.897755,-0.902370,0.552967,-0.314270,-1.198762
4,0.000305,0.003671,-0.004093,0.003010,-0.093583,0.133018,0.000627,0.001443,-0.367352,0.000462,...,-0.260746,-0.741712,-0.887129,0.190525,0.216271,0.490549,-1.047399,1.875185,0.345561,-0.874318
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64,0.000118,0.003550,-0.002475,0.003005,-0.069995,-0.016384,0.000791,0.000333,-0.328246,0.000422,...,-0.157696,-0.190375,-0.816606,-0.321006,1.516188,-0.243600,-0.758915,2.071124,0.243026,-0.237539
65,0.000332,0.006905,-0.000501,0.002183,-0.076969,-0.297055,0.001044,-0.001425,-0.216813,0.000541,...,-0.005628,0.637483,-0.318275,-0.252934,0.340965,-0.328232,-1.111827,2.369061,0.541017,0.046648
66,0.000279,0.005215,0.000533,0.001638,-0.065006,-0.366688,0.001068,-0.002233,-0.051759,0.000601,...,1.240270,0.002545,-0.703586,-0.088785,0.839848,-0.091532,-1.264338,5.064988,0.588164,0.028188
67,0.000345,0.004489,-0.002033,0.003131,-0.069861,0.107010,0.000825,0.000348,-0.371404,0.000535,...,0.224740,-0.283803,-0.822711,-0.511241,0.266097,-0.103484,-0.700022,1.780735,0.917772,-1.199582


In [40]:
# Merge dataframes and fix column names
num_features = features_df.shape[1]
feature_names = [f"f{i}" for i in range(num_features)]

features_df.columns = feature_names

df = pd.concat([df, features_df], axis=1)
display(df)


Unnamed: 0,uid,file_path,f0,f1,f2,f3,f4,f5,f6,f7,...,f1014,f1015,f1016,f1017,f1018,f1019,f1020,f1021,f1022,f1023
0,ABNTSS552,data/blind_test/test/ABNTSS552.csv,0.000233,0.006601,-0.001318,0.001098,-0.047733,-0.071875,0.000654,0.000902,...,-0.064665,-0.444786,-0.879349,1.048909,0.213126,1.170847,-1.172747,1.686595,0.482523,-0.440434
1,ADQRPH513,data/blind_test/test/ADQRPH513.csv,0.000233,0.003029,0.001606,0.001224,-0.092386,-0.434045,0.000668,-0.000410,...,-0.147889,1.168724,-0.486698,1.134707,-0.029372,0.092189,-0.791921,1.786787,2.089036,-0.690614
2,AEEEIG737,data/blind_test/test/AEEEIG737.csv,0.000464,0.006611,0.000842,0.001412,-0.152744,-0.355706,0.000906,-0.001229,...,0.411773,0.232481,-0.527885,-0.305296,-0.189008,-0.592684,-1.144780,3.459698,-0.199579,-0.999165
3,AFEOPC672,data/blind_test/test/AFEOPC672.csv,0.000441,0.006178,-0.000811,0.003572,-0.108863,-0.302020,0.000761,0.001851,...,0.457373,-0.782917,-1.072765,1.180279,-0.111142,1.897755,-0.902370,0.552967,-0.314270,-1.198762
4,AGHXWX765,data/blind_test/test/AGHXWX765.csv,0.000305,0.003671,-0.004093,0.003010,-0.093583,0.133018,0.000627,0.001443,...,-0.260746,-0.741712,-0.887129,0.190525,0.216271,0.490549,-1.047399,1.875185,0.345561,-0.874318
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64,DQALNF634,data/blind_test/test/DQALNF634.csv,0.000118,0.003550,-0.002475,0.003005,-0.069995,-0.016384,0.000791,0.000333,...,-0.157696,-0.190375,-0.816606,-0.321006,1.516188,-0.243600,-0.758915,2.071124,0.243026,-0.237539
65,DRFBTQ594,data/blind_test/test/DRFBTQ594.csv,0.000332,0.006905,-0.000501,0.002183,-0.076969,-0.297055,0.001044,-0.001425,...,-0.005628,0.637483,-0.318275,-0.252934,0.340965,-0.328232,-1.111827,2.369061,0.541017,0.046648
66,DRHUVS991,data/blind_test/test/DRHUVS991.csv,0.000279,0.005215,0.000533,0.001638,-0.065006,-0.366688,0.001068,-0.002233,...,1.240270,0.002545,-0.703586,-0.088785,0.839848,-0.091532,-1.264338,5.064988,0.588164,0.028188
67,DVDCBJ239,data/blind_test/test/DVDCBJ239.csv,0.000345,0.004489,-0.002033,0.003131,-0.069861,0.107010,0.000825,0.000348,...,0.224740,-0.283803,-0.822711,-0.511241,0.266097,-0.103484,-0.700022,1.780735,0.917772,-1.199582


In [41]:
# load StandardScaler from training notebook
%store -r scaler_std

In [42]:
# Apply standardization to blind test data
test_features = df[feature_names]
# Transform testing data using same fit
test_features = scaler_std.transform(test_features)

# Get back into pandas df
test_features_df = pd.DataFrame(test_features, columns=feature_names)
display(test_features_df)

Unnamed: 0,f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,...,f1014,f1015,f1016,f1017,f1018,f1019,f1020,f1021,f1022,f1023
0,-0.800761,0.996221,-0.540534,-1.675971,0.451161,0.429524,-0.646267,0.943481,0.585272,-0.746584,...,-0.075077,-0.378402,-0.911063,1.287197,-0.234135,0.741935,-0.791254,-0.016918,0.264098,0.517176
1,-0.801288,-0.548807,1.481916,-1.495172,-0.635529,-1.132195,-0.569514,-0.355825,0.185297,0.300761,...,-0.163076,1.745130,0.067488,1.406431,-0.507052,-0.450546,0.103909,0.078713,3.031359,-0.025210
2,1.117567,1.000479,0.953224,-1.225148,-2.104429,-0.794387,0.731425,-1.166887,-0.867572,0.717362,...,0.428690,0.512944,-0.035158,-0.594766,-0.686712,-1.207687,-0.725514,1.675473,-0.910841,-0.694142
3,0.930263,0.813116,-0.189754,1.877617,-1.036525,-0.562888,-0.062832,1.883144,0.539175,-0.824264,...,0.476905,-0.823415,-1.393088,1.469764,-0.599079,1.545548,-0.155711,-1.098942,-1.108399,-1.126864
4,-0.201692,-0.270913,-2.460358,1.070843,-0.664681,1.313048,-0.794924,1.478477,-1.401975,0.452067,...,-0.282406,-0.769185,-0.930453,0.094286,-0.230596,-0.010150,-0.496612,0.163088,0.028177,-0.423476
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64,-1.757193,-0.323197,-1.340988,1.062458,-0.090619,0.668809,0.104263,0.379726,-0.953780,0.121528,...,-0.173445,-0.043574,-0.754696,-0.616598,1.232379,-0.821768,0.181491,0.350108,-0.148441,0.957047
65,0.024371,1.127570,0.024496,-0.117748,-0.260349,-0.541478,1.489882,-1.361506,0.323347,1.120081,...,-0.012654,1.045965,0.487225,-0.521997,-0.090261,-0.915331,-0.648056,0.634482,0.364856,1.573159
66,-0.414176,0.396696,0.739580,-0.900131,0.030782,-0.841744,1.617159,-2.161737,2.215008,1.631294,...,1.304711,0.210328,-0.473034,-0.293876,0.471201,-0.653654,-1.006547,3.207688,0.446068,1.533138
67,0.131907,0.082657,-1.035176,1.244198,-0.087362,1.200898,0.287811,0.394717,-1.448414,1.074157,...,0.230928,-0.166534,-0.769911,-0.880971,-0.174520,-0.666866,0.319924,0.072937,1.013827,-1.128640


In [43]:
# Load features extracted from training notebook
%store -r extracted_feature_names

In [44]:
# Extract only features determined during feature selection
test_features_df = test_features_df[extracted_feature_names]
display(test_features_df)

Unnamed: 0,f0,f4,f6,f11,f12,f14,f15,f16,f20,f24,...,f994,f995,f998,f1000,f1002,f1003,f1008,f1012,f1015,f1018
0,-0.800761,0.451161,-0.646267,-1.288031,1.086135,0.809103,0.765699,-0.669772,0.779410,1.912315,...,-0.364252,0.102833,-0.406617,-0.224478,-0.855704,-0.259058,1.904052,0.025209,-0.378402,-0.234135
1,-0.801288,-0.635529,-0.569514,-1.611484,-0.864097,-0.064221,-0.609585,0.204083,-0.550261,0.838954,...,2.216530,-1.547781,0.503497,0.058215,-0.710672,0.348908,1.228203,-0.920996,1.745130,-0.507052
2,1.117567,-2.104429,0.731425,-0.475658,1.126156,-0.473041,0.363509,0.344563,0.312048,0.339146,...,0.172746,0.376362,0.402093,-0.389810,0.042567,-0.094230,-0.404514,-1.395357,0.512944,-0.686712
3,0.930263,-1.036525,-0.062832,0.621544,0.176143,0.714438,-0.468214,-0.711127,1.009066,0.275560,...,0.338796,1.100411,-0.845502,1.026403,-0.717042,-0.038452,0.946727,1.022542,-0.823415,-0.599079
4,-0.201692,-0.664681,-0.794924,1.334866,0.155380,-0.577110,0.312715,-0.613068,-0.344218,-1.621945,...,0.834709,0.054149,-1.220256,1.035228,-0.697995,0.637037,0.767018,0.140786,-0.769185,-0.230596
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64,-1.757193,-0.090619,0.104263,-0.092848,-1.772910,-0.365450,0.989223,-0.271363,-0.983533,0.619451,...,-0.198154,2.189357,-0.281232,-0.111613,-0.634773,1.425420,-0.069981,-0.965301,-0.043574,1.232379
65,0.024371,-0.260349,1.489882,-0.338481,0.165788,-1.085171,0.322559,1.120515,-0.713233,0.318048,...,0.663619,0.703145,-0.181578,-0.994021,0.969829,1.289756,-0.495231,-0.702232,1.045965,-0.090261
66,-0.414176,0.030782,1.617159,-1.020291,1.363748,-0.399980,0.368755,1.831225,0.732757,0.120327,...,-1.003561,1.034696,0.883564,0.143711,-0.479451,-0.072785,-0.482232,-1.529684,0.210328,0.471201
67,0.131907,-0.087362,0.287811,0.843457,-0.569160,-0.265063,1.154603,-0.011001,-1.253616,-2.051429,...,-0.599868,-0.819748,-0.736684,0.185740,-0.513769,0.043326,-0.119243,0.276508,-0.166534,-0.174520


In [45]:
# load model from training notebook
%store -r svm_model_final

In [46]:
# Make predictions
ypred = svm_model_final.predict(test_features_df)

# print(ypred)
# # show the inputs and predicted outputs
# for i in range(len(test_features_df)):
#  print("Predicted=%s" % ypred[i])


In [47]:
# Add prediction to df
test_features_df["Prediction"] = ypred
display(test_features_df)

Unnamed: 0,f0,f4,f6,f11,f12,f14,f15,f16,f20,f24,...,f995,f998,f1000,f1002,f1003,f1008,f1012,f1015,f1018,Prediction
0,-0.800761,0.451161,-0.646267,-1.288031,1.086135,0.809103,0.765699,-0.669772,0.779410,1.912315,...,0.102833,-0.406617,-0.224478,-0.855704,-0.259058,1.904052,0.025209,-0.378402,-0.234135,4
1,-0.801288,-0.635529,-0.569514,-1.611484,-0.864097,-0.064221,-0.609585,0.204083,-0.550261,0.838954,...,-1.547781,0.503497,0.058215,-0.710672,0.348908,1.228203,-0.920996,1.745130,-0.507052,3
2,1.117567,-2.104429,0.731425,-0.475658,1.126156,-0.473041,0.363509,0.344563,0.312048,0.339146,...,0.376362,0.402093,-0.389810,0.042567,-0.094230,-0.404514,-1.395357,0.512944,-0.686712,3
3,0.930263,-1.036525,-0.062832,0.621544,0.176143,0.714438,-0.468214,-0.711127,1.009066,0.275560,...,1.100411,-0.845502,1.026403,-0.717042,-0.038452,0.946727,1.022542,-0.823415,-0.599079,3
4,-0.201692,-0.664681,-0.794924,1.334866,0.155380,-0.577110,0.312715,-0.613068,-0.344218,-1.621945,...,0.054149,-1.220256,1.035228,-0.697995,0.637037,0.767018,0.140786,-0.769185,-0.230596,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64,-1.757193,-0.090619,0.104263,-0.092848,-1.772910,-0.365450,0.989223,-0.271363,-0.983533,0.619451,...,2.189357,-0.281232,-0.111613,-0.634773,1.425420,-0.069981,-0.965301,-0.043574,1.232379,0
65,0.024371,-0.260349,1.489882,-0.338481,0.165788,-1.085171,0.322559,1.120515,-0.713233,0.318048,...,0.703145,-0.181578,-0.994021,0.969829,1.289756,-0.495231,-0.702232,1.045965,-0.090261,3
66,-0.414176,0.030782,1.617159,-1.020291,1.363748,-0.399980,0.368755,1.831225,0.732757,0.120327,...,1.034696,0.883564,0.143711,-0.479451,-0.072785,-0.482232,-1.529684,0.210328,0.471201,0
67,0.131907,-0.087362,0.287811,0.843457,-0.569160,-0.265063,1.154603,-0.011001,-1.253616,-2.051429,...,-0.819748,-0.736684,0.185740,-0.513769,0.043326,-0.119243,0.276508,-0.166534,-0.174520,4
