In [1]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
# %matplotlib inline
import sklearn
from sklearn import datasets, linear_model, model_selection
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate
from sklearn.metrics import recall_score
import os
import time
from sklearn.preprocessing import StandardScaler
pd.options.display.max_rows = 100
pd.options.display.max_columns = 100

In [2]:
# Loading in Excel doc into a Dataframe
notebook_path = os.path.abspath("PortScan-normalized.ipynb")
temp = os.path.abspath("Data")
df = pd.read_csv(temp + "\\Friday-WorkingHours-Afternoon-PortScan.pcap_ISCX.csv", low_memory = False)

In [3]:
# Edits the Column Names
df.columns = df.columns.str.replace(' ', '')
df.columns = df.columns.str.replace('/s', '_s')

In [4]:
print(df.Label.value_counts())

PortScan    158930
BENIGN      127537
Name: Label, dtype: int64


In [5]:
# Changes BENIGN and DDoS into 0 and 1 respectively so they can be plotted
df.Label = df.Label.replace('BENIGN', 0)
df.Label = df.Label.replace('PortScan', 1)

In [6]:
# Moves Label to the front of the list
df = df[['Label', 'DestinationPort', 'FlowDuration', 'TotalFwdPackets',
       'TotalBackwardPackets', 'TotalLengthofFwdPackets',
       'TotalLengthofBwdPackets', 'FwdPacketLengthMax', 'FwdPacketLengthMin',
       'FwdPacketLengthMean', 'FwdPacketLengthStd', 'BwdPacketLengthMax',
       'BwdPacketLengthMin', 'BwdPacketLengthMean', 'BwdPacketLengthStd',
       'FlowBytes_s', 'FlowPackets_s', 'FlowIATMean', 'FlowIATStd',
       'FlowIATMax', 'FlowIATMin', 'FwdIATTotal', 'FwdIATMean', 'FwdIATStd',
       'FwdIATMax', 'FwdIATMin', 'BwdIATTotal', 'BwdIATMean', 'BwdIATStd',
       'BwdIATMax', 'BwdIATMin', 'FwdPSHFlags', 'BwdPSHFlags', 'FwdURGFlags',
       'BwdURGFlags', 'FwdHeaderLength', 'BwdHeaderLength', 'FwdPackets_s',
       'BwdPackets_s', 'MinPacketLength', 'MaxPacketLength',
       'PacketLengthMean', 'PacketLengthStd', 'PacketLengthVariance',
       'FINFlagCount', 'SYNFlagCount', 'RSTFlagCount', 'PSHFlagCount',
       'ACKFlagCount', 'URGFlagCount', 'CWEFlagCount', 'ECEFlagCount',
       'Down/UpRatio', 'AveragePacketSize', 'AvgFwdSegmentSize',
       'AvgBwdSegmentSize', 'FwdHeaderLength.1', 'FwdAvgBytes/Bulk',
       'FwdAvgPackets/Bulk', 'FwdAvgBulkRate', 'BwdAvgBytes/Bulk',
       'BwdAvgPackets/Bulk', 'BwdAvgBulkRate', 'SubflowFwdPackets',
       'SubflowFwdBytes', 'SubflowBwdPackets', 'SubflowBwdBytes',
       'Init_Win_bytes_forward', 'Init_Win_bytes_backward', 'act_data_pkt_fwd',
       'min_seg_size_forward', 'ActiveMean', 'ActiveStd', 'ActiveMax',
       'ActiveMin', 'IdleMean', 'IdleStd', 'IdleMax', 'IdleMin']]

In [7]:
df[df.isna().any(axis=1)]

Unnamed: 0,Label,DestinationPort,FlowDuration,TotalFwdPackets,TotalBackwardPackets,TotalLengthofFwdPackets,TotalLengthofBwdPackets,FwdPacketLengthMax,FwdPacketLengthMin,FwdPacketLengthMean,FwdPacketLengthStd,BwdPacketLengthMax,BwdPacketLengthMin,BwdPacketLengthMean,BwdPacketLengthStd,FlowBytes_s,FlowPackets_s,FlowIATMean,FlowIATStd,FlowIATMax,FlowIATMin,FwdIATTotal,FwdIATMean,FwdIATStd,FwdIATMax,FwdIATMin,BwdIATTotal,BwdIATMean,BwdIATStd,BwdIATMax,BwdIATMin,FwdPSHFlags,BwdPSHFlags,FwdURGFlags,BwdURGFlags,FwdHeaderLength,BwdHeaderLength,FwdPackets_s,BwdPackets_s,MinPacketLength,MaxPacketLength,PacketLengthMean,PacketLengthStd,PacketLengthVariance,FINFlagCount,SYNFlagCount,RSTFlagCount,PSHFlagCount,ACKFlagCount,URGFlagCount,CWEFlagCount,ECEFlagCount,Down/UpRatio,AveragePacketSize,AvgFwdSegmentSize,AvgBwdSegmentSize,FwdHeaderLength.1,FwdAvgBytes/Bulk,FwdAvgPackets/Bulk,FwdAvgBulkRate,BwdAvgBytes/Bulk,BwdAvgPackets/Bulk,BwdAvgBulkRate,SubflowFwdPackets,SubflowFwdBytes,SubflowBwdPackets,SubflowBwdBytes,Init_Win_bytes_forward,Init_Win_bytes_backward,act_data_pkt_fwd,min_seg_size_forward,ActiveMean,ActiveStd,ActiveMax,ActiveMin,IdleMean,IdleStd,IdleMax,IdleMin
12824,0,13370,0,2,0,0,0,0,0,0.0,0.0,0,0,0.0,0.0,,inf,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0,0,0,64,0,0.0,0.0,0,0,0.0,0.0,0.0,0,0,0,0,1,1,0,0,0,0.0,0.0,0.0,64,0,0,0,0,0,0,2,0,0,0,65535,-1,0,32,0.0,0.0,0,0,0.0,0.0,0,0
12827,0,52508,0,1,1,0,0,0,0,0.0,0.0,0,0,0.0,0.0,,inf,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0,0,0,32,32,0.0,0.0,0,0,0.0,0.0,0.0,0,0,0,0,1,0,0,0,1,0.0,0.0,0.0,32,0,0,0,0,0,0,1,0,1,0,227,65535,0,32,0.0,0.0,0,0,0.0,0.0,0,0
14192,0,46058,0,1,1,0,0,0,0,0.0,0.0,0,0,0.0,0.0,,inf,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0,0,0,32,32,0.0,0.0,0,0,0.0,0.0,0.0,0,0,0,0,1,0,0,0,1,0.0,0.0,0.0,32,0,0,0,0,0,0,1,0,1,0,227,229,0,32,0.0,0.0,0,0,0.0,0.0,0,0
40707,0,57119,0,2,0,0,0,0,0,0.0,0.0,0,0,0.0,0.0,,inf,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0,0,0,64,0,0.0,0.0,0,0,0.0,0.0,0.0,0,0,0,0,1,1,0,0,0,0.0,0.0,0.0,64,0,0,0,0,0,0,2,0,0,0,65535,-1,0,32,0.0,0.0,0,0,0.0,0.0,0,0
52456,0,16506,0,2,0,0,0,0,0,0.0,0.0,0,0,0.0,0.0,,inf,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0,0,0,64,0,0.0,0.0,0,0,0.0,0.0,0.0,0,0,0,0,1,0,0,0,0,0.0,0.0,0.0,64,0,0,0,0,0,0,2,0,0,0,229,-1,0,32,0.0,0.0,0,0,0.0,0.0,0,0
56603,0,52995,0,1,1,0,0,0,0,0.0,0.0,0,0,0.0,0.0,,inf,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0,0,0,44,32,0.0,0.0,0,0,0.0,0.0,0.0,0,0,0,0,1,0,0,0,1,0.0,0.0,0.0,44,0,0,0,0,0,0,1,0,1,0,408,65535,0,44,0.0,0.0,0,0,0.0,0.0,0,0
76549,0,48337,0,1,1,0,0,0,0,0.0,0.0,0,0,0.0,0.0,,inf,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0,0,0,32,32,0.0,0.0,0,0,0.0,0.0,0.0,0,0,0,0,1,0,0,0,1,0.0,0.0,0.0,32,0,0,0,0,0,0,1,0,1,0,227,229,0,32,0.0,0.0,0,0,0.0,0.0,0,0
76773,0,38790,0,2,0,0,0,0,0,0.0,0.0,0,0,0.0,0.0,,inf,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0,0,0,64,0,0.0,0.0,0,0,0.0,0.0,0.0,0,0,0,0,1,0,0,0,0,0.0,0.0,0.0,64,0,0,0,0,0,0,2,0,0,0,126,-1,0,32,0.0,0.0,0,0,0.0,0.0,0,0
225846,0,36648,0,1,1,0,0,0,0,0.0,0.0,0,0,0.0,0.0,,inf,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0,0,0,32,32,0.0,0.0,0,0,0.0,0.0,0.0,0,0,0,0,1,0,0,0,1,0.0,0.0,0.0,32,0,0,0,0,0,0,1,0,1,0,5438,38864,0,32,0.0,0.0,0,0,0.0,0.0,0,0
237968,0,51204,0,2,0,0,0,0,0,0.0,0.0,0,0,0.0,0.0,,inf,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0,0,0,64,0,0.0,0.0,0,0,0.0,0.0,0.0,0,0,0,0,1,0,0,0,0,0.0,0.0,0.0,64,0,0,0,0,0,0,2,0,0,0,357,-1,0,32,0.0,0.0,0,0,0.0,0.0,0,0


In [8]:
#Drops any rows with NA
df.dropna(how = 'any', subset = ['Label', 'DestinationPort', 'FlowDuration', 'TotalFwdPackets',
       'TotalBackwardPackets', 'TotalLengthofFwdPackets',
       'TotalLengthofBwdPackets', 'FwdPacketLengthMax', 'FwdPacketLengthMin',
       'FwdPacketLengthMean', 'FwdPacketLengthStd', 'BwdPacketLengthMax',
       'BwdPacketLengthMin', 'BwdPacketLengthMean', 'BwdPacketLengthStd',
       'FlowBytes_s', 'FlowPackets_s', 'FlowIATMean', 'FlowIATStd',
       'FlowIATMax', 'FlowIATMin', 'FwdIATTotal', 'FwdIATMean', 'FwdIATStd',
       'FwdIATMax', 'FwdIATMin', 'BwdIATTotal', 'BwdIATMean', 'BwdIATStd',
       'BwdIATMax', 'BwdIATMin', 'FwdPSHFlags', 'BwdPSHFlags', 'FwdURGFlags',
       'BwdURGFlags', 'FwdHeaderLength', 'BwdHeaderLength', 'FwdPackets_s',
       'BwdPackets_s', 'MinPacketLength', 'MaxPacketLength',
       'PacketLengthMean', 'PacketLengthStd', 'PacketLengthVariance',
       'FINFlagCount', 'SYNFlagCount', 'RSTFlagCount', 'PSHFlagCount',
       'ACKFlagCount', 'URGFlagCount', 'CWEFlagCount', 'ECEFlagCount',
       'Down/UpRatio', 'AveragePacketSize', 'AvgFwdSegmentSize',
       'AvgBwdSegmentSize', 'FwdHeaderLength.1', 'FwdAvgBytes/Bulk',
       'FwdAvgPackets/Bulk', 'FwdAvgBulkRate', 'BwdAvgBytes/Bulk',
       'BwdAvgPackets/Bulk', 'BwdAvgBulkRate', 'SubflowFwdPackets',
       'SubflowFwdBytes', 'SubflowBwdPackets', 'SubflowBwdBytes',
       'Init_Win_bytes_forward', 'Init_Win_bytes_backward', 'act_data_pkt_fwd',
       'min_seg_size_forward', 'ActiveMean', 'ActiveStd', 'ActiveMax',
       'ActiveMin', 'IdleMean', 'IdleStd', 'IdleMax', 'IdleMin'], inplace = True)

In [9]:
# Delete rows that contain infinity from FlowBytes and FlowPackets
df = df[~df.FlowBytes_s.str.startswith('Infinity')]
df = df[~df.FlowPackets_s.str.startswith('Infinity')]

In [10]:
# Converts FlowBytes_s and FlowPackets_s to numeric columns
df[['FlowBytes_s']] = df[['FlowBytes_s']].apply(pd.to_numeric)
df[['FlowPackets_s']] = df[['FlowPackets_s']].apply(pd.to_numeric)

In [11]:
df = df[['DestinationPort', 'FlowDuration', 'TotalFwdPackets',
   'TotalBackwardPackets', 'TotalLengthofFwdPackets',
   'TotalLengthofBwdPackets', 'FwdPacketLengthMax', 'FwdPacketLengthMin',
   'FwdPacketLengthMean', 'FwdPacketLengthStd', 'BwdPacketLengthMax',
   'BwdPacketLengthMin', 'BwdPacketLengthMean', 'BwdPacketLengthStd',
   'FlowBytes_s', 'FlowPackets_s', 'FlowIATMean', 'FlowIATStd',
   'FlowIATMax', 'FlowIATMin', 'FwdIATTotal', 'FwdIATMean', 'FwdIATStd',
   'FwdIATMax', 'FwdIATMin', 'BwdIATTotal', 'BwdIATMean', 'BwdIATStd',
   'BwdIATMax', 'BwdIATMin', 'FwdPSHFlags', 'BwdPSHFlags', 'FwdURGFlags',
   'BwdURGFlags', 'FwdHeaderLength', 'BwdHeaderLength', 'FwdPackets_s',
   'BwdPackets_s', 'MinPacketLength', 'MaxPacketLength',
   'PacketLengthMean', 'PacketLengthStd', 'PacketLengthVariance',
   'FINFlagCount', 'SYNFlagCount', 'RSTFlagCount', 'PSHFlagCount',
   'ACKFlagCount', 'URGFlagCount', 'CWEFlagCount', 'ECEFlagCount',
   'Down/UpRatio', 'AveragePacketSize', 'AvgFwdSegmentSize',
   'AvgBwdSegmentSize', 'FwdHeaderLength.1', 'FwdAvgBytes/Bulk',
   'FwdAvgPackets/Bulk', 'FwdAvgBulkRate', 'BwdAvgBytes/Bulk',
   'BwdAvgPackets/Bulk', 'BwdAvgBulkRate', 'SubflowFwdPackets',
   'SubflowFwdBytes', 'SubflowBwdPackets', 'SubflowBwdBytes',
   'Init_Win_bytes_forward', 'Init_Win_bytes_backward', 'act_data_pkt_fwd',
   'min_seg_size_forward', 'ActiveMean', 'ActiveStd', 'ActiveMax',
   'ActiveMin', 'IdleMean', 'IdleStd', 'IdleMax', 'IdleMin', 'Label']]
Y = df.Label
X = df.iloc[:,:-1]
xtrainDF, xtestDF, ytrainDF, ytestDF = train_test_split(X, Y, test_size=0.4, random_state=10)
sc = StandardScaler()
xtrainDF = sc.fit_transform(xtrainDF)
xtestDF = sc.transform(xtestDF)

# ------------------------------------------------------------------------- Linear Regression
reg = linear_model.LinearRegression()



fit = reg.fit(xtrainDF, ytrainDF)
prediction_of_test = fit.predict(xtestDF)
MSE = (np.sqrt(sklearn.metrics.mean_squared_error(ytestDF, prediction_of_test)))
print("MSE: ", MSE)

MSE:  0.12134134924188753


In [12]:
# Method that takes model and splites it into test and train data and spits out an MSE
def fit_model(df_data, df_label):
    xtrainDF, xtestDF, ytrainDF, ytestDF = train_test_split(df_data, df_label, test_size=0.4, random_state=10)
    sc = StandardScaler()
    xtrainDF = sc.fit_transform(xtrainDF)
    xtestDF = sc.transform(xtestDF)
    reg = linear_model.LinearRegression()
    fit = reg.fit(xtrainDF, ytrainDF)
    prediction_of_test = fit.predict(xtestDF)
    return (np.sqrt(sklearn.metrics.mean_squared_error(ytestDF, prediction_of_test)))

# Goes down every column and subcolumn until it peaks at an MSE
def traverse_df(df_data, df_label):
    tmp_mse1, tmp_mse2 = fit_model(df_data, df_label), None
    tmp_df = df_data
    for i in df_data:
        tmp_mse2 = fit_model(df_data.drop(i, axis = 1), df_label)
        if tmp_mse2 < tmp_mse1:
            tmp_mse1 = tmp_mse2
            tmp_df = df_data.drop(i, axis = 1)
    return tmp_mse1, tmp_df



# Main logic for traversing algorithm
df_label = df.Label
df_data = df.iloc[:,:-1]
MSE_list = {}
MSE_list[fit_model(df_data, df_label)] = df_data
j = 0
a = time.time()
for i in df_data:
    print(j)
    j = j + 1
    count = 0
    final_mse = 404
    final_df = None
    MSE, new_df = traverse_df(df_data.drop(i, axis = 1), df_label)
    while True:
        print(new_df.shape)
        if MSE < final_mse:
            print("while: ", count)
            count = count + 1
            final_mse = MSE
            final_df = new_df
        else:
            break
        MSE, new_df = traverse_df(final_df, df_label)
        
    MSE_list[final_mse] = final_df
b = time.time()

0
(286096, 76)
while:  0
(286096, 75)
while:  1
(286096, 74)
while:  2
(286096, 73)
while:  3
(286096, 73)
1
(286096, 76)
while:  0
(286096, 75)
while:  1
(286096, 75)
2
(286096, 76)
while:  0
(286096, 76)
3
(286096, 76)
while:  0
(286096, 75)
while:  1
(286096, 75)
4
(286096, 76)
while:  0
(286096, 75)
while:  1
(286096, 74)
while:  2
(286096, 74)
5
(286096, 76)
while:  0
(286096, 75)
while:  1
(286096, 74)
while:  2
(286096, 74)
6
(286096, 76)
while:  0
(286096, 75)
while:  1
(286096, 74)
while:  2
(286096, 73)
while:  3
(286096, 73)
7
(286096, 76)
while:  0
(286096, 75)
while:  1
(286096, 74)
while:  2
(286096, 74)
8
(286096, 76)
while:  0
(286096, 76)
9
(286096, 76)
while:  0
(286096, 75)
while:  1
(286096, 75)
10
(286096, 76)
while:  0
(286096, 75)
while:  1
(286096, 74)
while:  2
(286096, 73)
while:  3
(286096, 72)
while:  4
(286096, 72)
11
(286096, 76)
while:  0
(286096, 75)
while:  1
(286096, 74)
while:  2
(286096, 73)
while:  3
(286096, 72)
while:  4
(286096, 71)
while:  5
(28

In [13]:
# Takes the list of MSE's and finds the smallest one
fMSE = 404
fdf = None
for key in MSE_list:
    if fMSE > key:
        fMSE = key
        fdf = MSE_list[key]

In [14]:
print(b-a)

30011.701190948486


In [15]:
fdf

Unnamed: 0,DestinationPort,FlowDuration,TotalFwdPackets,TotalBackwardPackets,TotalLengthofFwdPackets,TotalLengthofBwdPackets,FwdPacketLengthMax,FwdPacketLengthMin,FwdPacketLengthMean,FwdPacketLengthStd,BwdPacketLengthMax,BwdPacketLengthMin,BwdPacketLengthMean,BwdPacketLengthStd,FlowBytes_s,FlowPackets_s,FlowIATMean,FlowIATStd,FlowIATMax,FlowIATMin,FwdIATTotal,FwdIATMean,FwdIATStd,FwdIATMax,FwdIATMin,BwdIATTotal,BwdIATMean,BwdIATStd,BwdIATMax,BwdIATMin,FwdPSHFlags,BwdPSHFlags,FwdURGFlags,BwdURGFlags,FwdHeaderLength,BwdHeaderLength,FwdPackets_s,BwdPackets_s,MinPacketLength,MaxPacketLength,PacketLengthMean,PacketLengthStd,PacketLengthVariance,FINFlagCount,SYNFlagCount,RSTFlagCount,PSHFlagCount,ACKFlagCount,URGFlagCount,CWEFlagCount,ECEFlagCount,Down/UpRatio,AveragePacketSize,AvgFwdSegmentSize,AvgBwdSegmentSize,FwdHeaderLength.1,FwdAvgBytes/Bulk,FwdAvgPackets/Bulk,FwdAvgBulkRate,BwdAvgBytes/Bulk,BwdAvgPackets/Bulk,BwdAvgBulkRate,SubflowFwdPackets,SubflowFwdBytes,SubflowBwdBytes,Init_Win_bytes_forward,Init_Win_bytes_backward,act_data_pkt_fwd,min_seg_size_forward,ActiveMean,ActiveMin,IdleMean,IdleStd,IdleMax,IdleMin
0,22,1266342,41,44,2664,6954,456,0,64.975610,109.864573,976,0,158.045455,312.675250,7.595105e+03,67.122468,15075.500000,104051.399700,948537,0,1266342,31658.550000,159355.259500,996324,2,317671,7387.697674,19636.448090,104616,1,0,0,0,0,1328,1424,32.376720,34.745748,0,976,111.837209,239.686848,5.744978e+04,0,0,0,1,0,0,0,0,1,113.152941,64.975610,158.045455,1328,0,0,0,0,0,0,41,2664,6954,29200,243,24,32,0.0,0,0.0,0.0,0,0
1,22,1319353,41,44,2664,6954,456,0,64.975610,109.864573,976,0,158.045455,312.675250,7.289937e+03,64.425518,15706.583330,104861.870100,955790,1,1319353,32983.825000,159247.900800,996423,1,363429,8451.837209,21337.262610,104815,1,0,0,0,0,1328,1424,31.075838,33.349680,0,976,111.837209,239.686848,5.744978e+04,0,0,0,1,0,0,0,0,1,113.152941,64.975610,158.045455,1328,0,0,0,0,0,0,41,2664,6954,29200,243,24,32,0.0,0,0.0,0.0,0,0
2,22,160,1,1,0,0,0,0,0.000000,0.000000,0,0,0.000000,0.000000,0.000000e+00,12500.000000,160.000000,0.000000,160,160,0,0.000000,0.000000,0,0,0,0.000000,0.000000,0,0,0,0,0,0,32,32,6250.000000,6250.000000,0,0,0.000000,0.000000,0.000000e+00,0,0,0,0,1,1,0,0,1,0.000000,0.000000,0.000000,32,0,0,0,0,0,0,1,0,0,290,243,0,32,0.0,0,0.0,0.0,0,0
3,22,1303488,41,42,2728,6634,456,0,66.536585,110.129945,976,0,157.952381,319.121427,7.182268e+03,63.675308,15896.195120,106554.899000,956551,0,1303488,32587.200000,160397.049900,997357,1,346851,8459.780488,23962.238920,138295,0,0,0,0,0,1328,1360,31.454068,32.221240,0,976,111.452381,241.642792,5.839124e+04,0,0,0,1,0,0,0,0,1,112.795181,66.536585,157.952381,1328,0,0,0,0,0,0,41,2728,6634,29200,243,24,32,0.0,0,0.0,0.0,0,0
4,35396,77,1,2,0,0,0,0,0.000000,0.000000,0,0,0.000000,0.000000,0.000000e+00,38961.038960,38.500000,14.849242,49,28,0,0.000000,0.000000,0,0,49,49.000000,0.000000,49,49,0,0,0,0,32,64,12987.012990,25974.025970,0,0,0.000000,0.000000,0.000000e+00,0,0,0,0,1,1,0,0,2,0.000000,0.000000,0.000000,32,0,0,0,0,0,0,1,0,0,243,290,0,32,0.0,0,0.0,0.0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
286462,443,196135,49,57,1331,105841,570,0,27.163265,108.067176,4344,0,1856.859649,972.796621,5.464196e+05,540.444082,1867.952381,7207.469062,53438,1,196135,4086.145833,10366.095180,53521,1,172901,3087.517857,9307.416597,53438,2,0,0,0,0,1252,1832,249.827925,290.616157,0,4344,1001.607477,1160.591064,1.346972e+06,0,0,0,1,0,0,0,0,1,1011.056604,27.163265,1856.859649,1252,0,0,0,0,0,0,49,1331,105841,29200,61,31,20,0.0,0,0.0,0.0,0,0
286463,443,378424,49,59,1325,104393,570,0,27.040816,108.095051,2896,0,1769.372881,848.453540,2.793639e+05,285.394161,3536.672897,18326.251010,173388,0,378424,7883.833333,34491.462460,236489,2,355402,6127.620690,27253.356700,196547,1,0,0,0,0,1264,1896,129.484388,155.909773,0,2896,969.889908,1073.781250,1.153006e+06,0,0,0,1,0,0,0,0,1,978.870370,27.040816,1769.372881,1264,0,0,0,0,0,0,49,1325,104393,29200,61,30,20,0.0,0,0.0,0.0,0,0
286464,443,161800,70,103,1427,215903,570,0,20.385714,90.746389,4344,0,2096.145631,920.298603,1.343201e+06,1069.221261,940.697674,3947.183709,23268,1,161800,2344.927536,6109.558409,23934,2,138764,1360.431373,5201.981596,32464,1,0,0,0,0,1732,3304,432.632880,636.588381,0,4344,1249.022989,1244.874147,1.549712e+06,0,0,0,1,0,0,0,0,1,1256.242775,20.385714,2096.145631,1732,0,0,0,0,0,0,70,1427,215903,29200,61,47,20,0.0,0,0.0,0.0,0,0
286465,443,142864,50,62,1331,110185,570,0,26.620000,107.027727,2896,0,1777.177419,799.911092,7.805745e+05,783.962370,1287.063063,4955.002013,23111,1,142864,2915.591837,7300.372125,23800,2,119753,1963.163934,6025.630847,23826,1,0,0,0,0,1284,1992,349.983201,433.979169,0,2896,986.867257,1058.110053,1.119597e+06,0,0,0,1,0,0,0,0,1,995.678571,26.620000,1777.177419,1284,0,0,0,0,0,0,50,1331,110185,29200,61,31,20,0.0,0,0.0,0.0,0,0


In [16]:
print(fMSE)

0.12098361317905643


In [17]:
fdf.to_csv(os.getcwd() + '//final-ps-lr-normalized.csv' , index = False)

In [18]:
for key in MSE_list:
    print(key)

0.12134134924188753
0.1212674340412848
0.12141257039768091
0.12124806275650457
0.12117333516798715
0.12109267979739427
0.12115199552218664
0.12111985993721759
0.12133402783962208
0.1212726139021579
0.12137702773207559
0.12118178233201178
0.1218916447874929
0.12122835168034765
0.12121609939585448
0.12151641487819452
0.12123372980035356
0.1214170129962233
0.12120521669243868
0.12129852631235569
0.12125462173622574
0.12164599038427268
0.12107593938074662
0.12115412733871393
0.12117114078648028
0.12121675434477953
0.12106920328617833
0.12109665591431604
0.12140748311302528
0.12159388583606244
0.1213535489447513
0.12112025233720124
0.12116364122479557
0.12119109436205552
0.12178916583359314
0.12119519192589435
0.1212798437822736
0.12114767339291357
0.12188284370366703
0.13226995821428203
0.12103444538628616
0.12120502363595123
0.12119103070224008
0.18569566415796787
0.12198587160921413
0.12212797689622762
0.12106771148551525
0.12110383345843193
0.12390574650667943
0.12114510511040676
0.1211