In [247]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
import xml.etree.ElementTree as et

In [248]:
train_df = pd.read_csv('train.csv')
xtree = et.parse('holidays.xml')
weather_df = pd.read_csv('weather-sfcsv.csv')

In [249]:
xroot = xtree.getroot() 
df_cols = ['date', 'description']
rows = []
for node in xroot: 
    s_name = node.find("date").text if node is not None else None
    s_mail = node.find("description").text if node is not None else None
    rows.append({"date": s_name, "description": s_mail})
out_df = pd.DataFrame(rows, columns = df_cols)
out_df['datetime'] = [datetime.strptime(d, '%Y-%m-%d') for d in out_df['date']]
out_df['Year'] = [d.year for d in out_df['datetime']]
out_df['Month'] = [d.month for d in out_df['datetime']]
out_df['Day'] = [d.day for d in out_df['datetime']]
out_df = out_df.drop(['date', 'datetime'], axis=1)
out_df.head()

Unnamed: 0,description,Year,Month,Day
0,New Year Day,2012,1,2
1,Martin Luther King Jr. Day,2012,1,16
2,Presidents Day (Washingtons Birthday),2012,2,20
3,Memorial Day,2012,5,28
4,Independence Day,2012,7,4


In [250]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6407 entries, 0 to 6406
Data columns (total 16 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   ID            6407 non-null   int64  
 1   Lat           6407 non-null   float64
 2   Lng           6407 non-null   float64
 3   Bump          6407 non-null   bool   
 4   Distance(mi)  6407 non-null   float64
 5   Crossing      6407 non-null   bool   
 6   Give_Way      6407 non-null   bool   
 7   Junction      6407 non-null   bool   
 8   No_Exit       6407 non-null   bool   
 9   Railway       6407 non-null   bool   
 10  Roundabout    6407 non-null   bool   
 11  Stop          6407 non-null   bool   
 12  Amenity       6407 non-null   bool   
 13  Side          6407 non-null   object 
 14  Severity      6407 non-null   int64  
 15  timestamp     6407 non-null   object 
dtypes: bool(9), float64(3), int64(2), object(2)
memory usage: 406.8+ KB


In [251]:
weather_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6901 entries, 0 to 6900
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Year               6901 non-null   int64  
 1   Day                6901 non-null   int64  
 2   Month              6901 non-null   int64  
 3   Hour               6901 non-null   int64  
 4   Weather_Condition  6900 non-null   object 
 5   Wind_Chill(F)      3292 non-null   float64
 6   Precipitation(in)  3574 non-null   float64
 7   Temperature(F)     6899 non-null   float64
 8   Humidity(%)        6899 non-null   float64
 9   Wind_Speed(mph)    6556 non-null   float64
 10  Visibility(mi)     6900 non-null   float64
 11  Selected           6901 non-null   object 
dtypes: float64(6), int64(4), object(2)
memory usage: 647.1+ KB


In [252]:
train_df.head()

Unnamed: 0,ID,Lat,Lng,Bump,Distance(mi),Crossing,Give_Way,Junction,No_Exit,Railway,Roundabout,Stop,Amenity,Side,Severity,timestamp
0,0,37.76215,-122.40566,False,0.044,False,False,False,False,False,False,False,True,R,2,2016-03-25 15:13:02
1,1,37.719157,-122.448254,False,0.0,False,False,False,False,False,False,False,False,R,2,2020-05-05 19:23:00
2,2,37.808498,-122.366852,False,0.0,False,False,False,False,False,False,True,False,R,3,2016-09-16 19:57:16
3,3,37.78593,-122.39108,False,0.009,False,False,True,False,False,False,False,False,R,1,2020-03-29 19:48:43
4,4,37.719141,-122.448457,False,0.0,False,False,False,False,False,False,False,False,R,2,2019-10-09 08:47:00


In [253]:
weather_df.head()

Unnamed: 0,Year,Day,Month,Hour,Weather_Condition,Wind_Chill(F),Precipitation(in),Temperature(F),Humidity(%),Wind_Speed(mph),Visibility(mi),Selected
0,2020,27,7,18,Fair,64.0,0.0,64.0,70.0,20.0,10.0,No
1,2017,30,9,17,Partly Cloudy,,,71.1,57.0,9.2,10.0,No
2,2017,27,6,5,Overcast,,,57.9,87.0,15.0,9.0,No
3,2016,7,9,9,Clear,,,66.9,73.0,4.6,10.0,No
4,2019,19,10,2,Fair,52.0,0.0,52.0,89.0,0.0,9.0,No


In [254]:
train_df['timestamp_datetime'] = [datetime.strptime(d.split('.')[0], '%Y-%m-%d %H:%M:%S') for d in train_df['timestamp']]
train_df['Year'] = [d.year for d in train_df['timestamp_datetime']]
train_df['Month'] = [d.month for d in train_df['timestamp_datetime']]
train_df['Day'] = [d.day for d in train_df['timestamp_datetime']]
train_df['Hour'] = [d.hour for d in train_df['timestamp_datetime']]

In [255]:
train_df = train_df.drop(columns=['timestamp', 'timestamp_datetime', 'ID'],axis=1)
train_df.head()

Unnamed: 0,Lat,Lng,Bump,Distance(mi),Crossing,Give_Way,Junction,No_Exit,Railway,Roundabout,Stop,Amenity,Side,Severity,Year,Month,Day,Hour
0,37.76215,-122.40566,False,0.044,False,False,False,False,False,False,False,True,R,2,2016,3,25,15
1,37.719157,-122.448254,False,0.0,False,False,False,False,False,False,False,False,R,2,2020,5,5,19
2,37.808498,-122.366852,False,0.0,False,False,False,False,False,False,True,False,R,3,2016,9,16,19
3,37.78593,-122.39108,False,0.009,False,False,True,False,False,False,False,False,R,1,2020,3,29,19
4,37.719141,-122.448457,False,0.0,False,False,False,False,False,False,False,False,R,2,2019,10,9,8


In [256]:
train_df_wd = train_df.drop_duplicates(subset=train_df.keys(), keep='last')
weather_df_wd = weather_df.drop_duplicates(subset=['Year', 'Day', 'Month', 'Hour'], keep='last')

In [257]:
new_df = pd.merge(train_df_wd, weather_df_wd,  how='left', left_on=['Year','Day', 'Month', 'Hour'], right_on = ['Year','Day', 'Month', 'Hour'])
new_df.head()

Unnamed: 0,Lat,Lng,Bump,Distance(mi),Crossing,Give_Way,Junction,No_Exit,Railway,Roundabout,...,Day,Hour,Weather_Condition,Wind_Chill(F),Precipitation(in),Temperature(F),Humidity(%),Wind_Speed(mph),Visibility(mi),Selected
0,37.719157,-122.448254,False,0.0,False,False,False,False,False,False,...,5,19,Mostly Cloudy,59.0,0.0,59.0,78.0,20.0,10.0,No
1,37.808498,-122.366852,False,0.0,False,False,False,False,False,False,...,16,19,Clear,,,62.1,80.0,9.2,10.0,No
2,37.78593,-122.39108,False,0.009,False,False,True,False,False,False,...,29,19,Fair,58.0,0.0,58.0,70.0,10.0,10.0,No
3,37.719141,-122.448457,False,0.0,False,False,False,False,False,False,...,9,8,Fair,58.0,0.0,58.0,65.0,3.0,10.0,No
4,37.769611,-122.416878,False,0.0,False,False,False,False,False,False,...,22,8,Fair,61.0,0.0,61.0,81.0,13.0,10.0,No


In [258]:
new_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6295 entries, 0 to 6294
Data columns (total 26 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Lat                6295 non-null   float64
 1   Lng                6295 non-null   float64
 2   Bump               6295 non-null   bool   
 3   Distance(mi)       6295 non-null   float64
 4   Crossing           6295 non-null   bool   
 5   Give_Way           6295 non-null   bool   
 6   Junction           6295 non-null   bool   
 7   No_Exit            6295 non-null   bool   
 8   Railway            6295 non-null   bool   
 9   Roundabout         6295 non-null   bool   
 10  Stop               6295 non-null   bool   
 11  Amenity            6295 non-null   bool   
 12  Side               6295 non-null   object 
 13  Severity           6295 non-null   int64  
 14  Year               6295 non-null   int64  
 15  Month              6295 non-null   int64  
 16  Day                6295 

In [259]:
new_df.isnull().sum()

Lat                     0
Lng                     0
Bump                    0
Distance(mi)            0
Crossing                0
Give_Way                0
Junction                0
No_Exit                 0
Railway                 0
Roundabout              0
Stop                    0
Amenity                 0
Side                    0
Severity                0
Year                    0
Month                   0
Day                     0
Hour                    0
Weather_Condition       1
Wind_Chill(F)        3118
Precipitation(in)    2871
Temperature(F)          2
Humidity(%)             2
Wind_Speed(mph)       306
Visibility(mi)          1
Selected                0
dtype: int64

In [260]:
new_df.dropna(axis=0, subset=['Weather_Condition', 'Temperature(F)', 'Humidity(%)', 'Visibility(mi)'], inplace=True)

In [261]:
new_df.isnull().sum()

Lat                     0
Lng                     0
Bump                    0
Distance(mi)            0
Crossing                0
Give_Way                0
Junction                0
No_Exit                 0
Railway                 0
Roundabout              0
Stop                    0
Amenity                 0
Side                    0
Severity                0
Year                    0
Month                   0
Day                     0
Hour                    0
Weather_Condition       0
Wind_Chill(F)        3116
Precipitation(in)    2870
Temperature(F)          0
Humidity(%)             0
Wind_Speed(mph)       305
Visibility(mi)          0
Selected                0
dtype: int64

In [262]:
new_df['Wind_Chill(F)'].interpolate(method='linear', inplace=True)
new_df['Precipitation(in)'].interpolate(method='linear', inplace=True)
new_df['Wind_Speed(mph)'].interpolate(method='linear', inplace=True)

In [263]:
new_df.isnull().sum()

Lat                  0
Lng                  0
Bump                 0
Distance(mi)         0
Crossing             0
Give_Way             0
Junction             0
No_Exit              0
Railway              0
Roundabout           0
Stop                 0
Amenity              0
Side                 0
Severity             0
Year                 0
Month                0
Day                  0
Hour                 0
Weather_Condition    0
Wind_Chill(F)        0
Precipitation(in)    0
Temperature(F)       0
Humidity(%)          0
Wind_Speed(mph)      0
Visibility(mi)       0
Selected             0
dtype: int64

In [264]:
new_df.head()

Unnamed: 0,Lat,Lng,Bump,Distance(mi),Crossing,Give_Way,Junction,No_Exit,Railway,Roundabout,...,Day,Hour,Weather_Condition,Wind_Chill(F),Precipitation(in),Temperature(F),Humidity(%),Wind_Speed(mph),Visibility(mi),Selected
0,37.719157,-122.448254,False,0.0,False,False,False,False,False,False,...,5,19,Mostly Cloudy,59.0,0.0,59.0,78.0,20.0,10.0,No
1,37.808498,-122.366852,False,0.0,False,False,False,False,False,False,...,16,19,Clear,58.5,0.0,62.1,80.0,9.2,10.0,No
2,37.78593,-122.39108,False,0.009,False,False,True,False,False,False,...,29,19,Fair,58.0,0.0,58.0,70.0,10.0,10.0,No
3,37.719141,-122.448457,False,0.0,False,False,False,False,False,False,...,9,8,Fair,58.0,0.0,58.0,65.0,3.0,10.0,No
4,37.769611,-122.416878,False,0.0,False,False,False,False,False,False,...,22,8,Fair,61.0,0.0,61.0,81.0,13.0,10.0,No


In [265]:
new_df.replace({True: 1, False: 0, 'Yes': 1, 'No': 0, 'R': 1, 'L': 0}, inplace=True)

In [266]:
weather_conditions = new_df['Weather_Condition'].unique()

In [267]:
new_df.iloc[:4,10:]

Unnamed: 0,Stop,Amenity,Side,Severity,Year,Month,Day,Hour,Weather_Condition,Wind_Chill(F),Precipitation(in),Temperature(F),Humidity(%),Wind_Speed(mph),Visibility(mi),Selected
0,0,0,1,2,2020,5,5,19,Mostly Cloudy,59.0,0.0,59.0,78.0,20.0,10.0,0
1,1,0,1,3,2016,9,16,19,Clear,58.5,0.0,62.1,80.0,9.2,10.0,0
2,0,0,1,1,2020,3,29,19,Fair,58.0,0.0,58.0,70.0,10.0,10.0,0
3,0,0,1,2,2019,10,9,8,Fair,58.0,0.0,58.0,65.0,3.0,10.0,0


In [268]:
new_df.corr()

Unnamed: 0,Lat,Lng,Bump,Distance(mi),Crossing,Give_Way,Junction,No_Exit,Railway,Roundabout,...,Month,Day,Hour,Wind_Chill(F),Precipitation(in),Temperature(F),Humidity(%),Wind_Speed(mph),Visibility(mi),Selected
Lat,1.0,0.386898,,0.036637,0.042491,0.011435,0.003886,0.010526,-0.002532,,...,-0.031627,0.007884,0.021232,0.016655,-0.029815,0.041457,0.009523,-0.048801,0.001478,0.017103
Lng,0.386898,1.0,,0.072426,-0.122937,-0.032874,0.183286,-0.000703,-0.031049,,...,-0.018109,-0.004642,0.008503,0.015033,-0.010141,0.025421,0.015522,-0.065437,-0.026506,0.016744
Bump,,,,,,,,,,,...,,,,,,,,,,
Distance(mi),0.036637,0.072426,,1.0,-0.019238,-0.007191,-0.029824,0.000378,-0.033722,,...,0.077305,0.008932,0.0044,-0.002528,-0.002641,-0.022384,0.002025,-0.019746,-0.000227,-0.006052
Crossing,0.042491,-0.122937,,-0.019238,1.0,0.072388,-0.159816,-0.003803,0.429768,,...,-0.00689,0.02501,-0.037193,-0.027834,-0.026792,-0.040101,0.013822,-0.036602,0.014313,-0.005379
Give_Way,0.011435,-0.032874,,-0.007191,0.072388,1.0,-0.012346,-0.000275,0.041403,,...,0.01165,-0.005033,0.008091,0.01817,-0.005433,0.01733,-0.024758,-0.012373,0.007411,-0.000389
Junction,0.003886,0.183286,,-0.029824,-0.159816,-0.012346,1.0,-0.007127,-0.093908,,...,-0.036197,-0.019051,0.017255,0.031131,-0.00063,0.013706,-0.032847,-0.005291,-0.006794,0.010731
No_Exit,0.010526,-0.000703,,0.000378,-0.003803,-0.000275,-0.007127,1.0,-0.002094,,...,-0.006206,-0.015405,0.000334,-0.003682,0.001749,0.004806,0.003006,0.005812,0.004278,-0.000225
Railway,-0.002532,-0.031049,,-0.033722,0.429768,0.041403,-0.093908,-0.002094,1.0,,...,0.02189,0.023491,-0.013697,-0.017477,-0.013718,-0.0052,-0.021023,-0.010053,0.006953,-0.002962
Roundabout,,,,,,,,,,,...,,,,,,,,,,


In [269]:
new_df.drop(['Roundabout', 'Bump', 'Day'], axis=1, inplace=True)

In [270]:
new_df.iloc[:,10:].corr()

Unnamed: 0,Side,Severity,Year,Month,Hour,Wind_Chill(F),Precipitation(in),Temperature(F),Humidity(%),Wind_Speed(mph),Visibility(mi),Selected
Side,1.0,0.061376,-0.016249,-0.003547,0.029646,0.006587,0.007771,0.031358,-0.008165,0.016098,-0.01023,0.004147
Severity,0.061376,1.0,-0.171061,-0.053281,-0.019082,0.00087,0.016597,-0.024845,0.085291,0.016236,-0.014778,0.024455
Year,-0.016249,-0.171061,1.0,-0.085373,0.040108,-0.00691,-0.085345,-0.07405,-0.076163,-0.110607,-0.051944,0.007963
Month,-0.003547,-0.053281,-0.085373,1.0,0.005463,0.06532,-0.055113,0.155753,-0.067141,-0.082508,0.017105,-0.023741
Hour,0.029646,-0.019082,0.040108,0.005463,1.0,0.139683,-0.041511,0.229191,-0.240791,0.238729,0.101258,-0.00873
Wind_Chill(F),0.006587,0.00087,-0.00691,0.06532,0.139683,1.0,-0.088951,0.638232,-0.371856,0.206727,0.101054,-0.004131
Precipitation(in),0.007771,0.016597,-0.085345,-0.055113,-0.041511,-0.088951,1.0,-0.088637,0.160416,0.053557,-0.378623,0.03875
Temperature(F),0.031358,-0.024845,-0.07405,0.155753,0.229191,0.638232,-0.088637,1.0,-0.592714,0.287388,0.162112,-0.017704
Humidity(%),-0.008165,0.085291,-0.076163,-0.067141,-0.240791,-0.371856,0.160416,-0.592714,1.0,-0.185033,-0.309088,-0.030802
Wind_Speed(mph),0.016098,0.016236,-0.110607,-0.082508,0.238729,0.206727,0.053557,0.287388,-0.185033,1.0,0.046785,0.005182


In [271]:
new_df['Lat'] = (new_df['Lat'] - new_df['Lat'].mean())/ new_df['Lat'].std()

In [272]:
new_df.describe()

Unnamed: 0,Lat,Lng,Distance(mi),Crossing,Give_Way,Junction,No_Exit,Railway,Stop,Amenity,...,Year,Month,Hour,Wind_Chill(F),Precipitation(in),Temperature(F),Humidity(%),Wind_Speed(mph),Visibility(mi),Selected
count,6293.0,6293.0,6293.0,6293.0,6293.0,6293.0,6293.0,6293.0,6293.0,6293.0,...,6293.0,6293.0,6293.0,6293.0,6293.0,6293.0,6293.0,6293.0,6293.0,6293.0
mean,-3.348911e-14,-122.406075,0.134155,0.083426,0.000477,0.242174,0.000159,0.026855,0.098522,0.037343,...,2018.387097,6.759733,12.846178,59.916622,0.00642,59.948181,68.119021,10.823796,9.43977,0.000318
std,1.0,0.028259,0.395302,0.276547,0.02183,0.428433,0.012606,0.161673,0.298043,0.189616,...,1.372525,3.574934,5.813688,8.274081,0.025806,8.005354,16.278361,6.455701,1.651113,0.017826
min,-4.789971,-122.51044,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2016.0,1.0,0.0,31.1,0.0,36.0,10.0,0.0,0.12,0.0
25%,-0.8845452,-122.412227,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2017.0,4.0,8.0,54.6,0.0,54.0,59.0,5.8,10.0,0.0
50%,0.0812855,-122.404835,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2019.0,7.0,14.0,59.2,0.0,59.0,70.0,10.0,10.0,0.0
75%,0.6706111,-122.392532,0.038,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2020.0,10.0,17.0,65.0,0.0,64.9,80.0,15.0,10.0,0.0
max,1.843612,-122.349734,6.82,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,2020.0,12.0,23.0,98.0,0.49,98.0,100.0,40.3,10.0,1.0


In [273]:
new_df.head()

Unnamed: 0,Lat,Lng,Distance(mi),Crossing,Give_Way,Junction,No_Exit,Railway,Stop,Amenity,...,Month,Hour,Weather_Condition,Wind_Chill(F),Precipitation(in),Temperature(F),Humidity(%),Wind_Speed(mph),Visibility(mi),Selected
0,-1.426055,-122.448254,0.0,0,0,0,0,0,0,0,...,5,19,Mostly Cloudy,59.0,0.0,59.0,78.0,20.0,10.0,0
1,1.31761,-122.366852,0.0,0,0,0,0,0,1,0,...,9,19,Clear,58.5,0.0,62.1,80.0,9.2,10.0,0
2,0.624546,-122.39108,0.009,0,0,1,0,0,0,0,...,3,19,Fair,58.0,0.0,58.0,70.0,10.0,10.0,0
3,-1.426547,-122.448457,0.0,0,0,0,0,0,0,0,...,10,8,Fair,58.0,0.0,58.0,65.0,3.0,10.0,0
4,0.123389,-122.416878,0.0,0,0,0,0,0,0,0,...,9,8,Fair,61.0,0.0,61.0,81.0,13.0,10.0,0


In [274]:
new_df['Lng'] = (new_df['Lng'] - new_df['Lng'].mean())/ new_df['Lng'].std()
new_df['Wind_Speed(mph)'] = (new_df['Wind_Speed(mph)'] - new_df['Wind_Speed(mph)'].mean())/ new_df['Wind_Speed(mph)'].std()
new_df['Wind_Chill(F)'] = (new_df['Wind_Chill(F)'] - new_df['Wind_Chill(F)'].mean())/ new_df['Wind_Chill(F)'].std()
new_df['Temperature(F)'] = (new_df['Temperature(F)'] - new_df['Temperature(F)'].mean())/ new_df['Temperature(F)'].std()
new_df['Visibility(mi)'] = (new_df['Visibility(mi)'] - new_df['Visibility(mi)'].mean())/ new_df['Visibility(mi)'].std()

In [275]:
new_df.describe()

Unnamed: 0,Lat,Lng,Distance(mi),Crossing,Give_Way,Junction,No_Exit,Railway,Stop,Amenity,...,Year,Month,Hour,Wind_Chill(F),Precipitation(in),Temperature(F),Humidity(%),Wind_Speed(mph),Visibility(mi),Selected
count,6293.0,6293.0,6293.0,6293.0,6293.0,6293.0,6293.0,6293.0,6293.0,6293.0,...,6293.0,6293.0,6293.0,6293.0,6293.0,6293.0,6293.0,6293.0,6293.0,6293.0
mean,-3.348911e-14,1.807622e-13,0.134155,0.083426,0.000477,0.242174,0.000159,0.026855,0.098522,0.037343,...,2018.387097,6.759733,12.846178,-8.869082e-16,0.00642,-7.113331e-16,68.119021,4.9115860000000003e-17,-1.106518e-16,0.000318
std,1.0,1.0,0.395302,0.276547,0.02183,0.428433,0.012606,0.161673,0.298043,0.189616,...,1.372525,3.574934,5.813688,1.0,0.025806,1.0,16.278361,1.0,1.0,0.017826
min,-4.789971,-3.693206,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2016.0,1.0,0.0,-3.482758,0.0,-2.991521,10.0,-1.676626,-5.644539,0.0
25%,-0.8845452,-0.2177104,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2017.0,4.0,8.0,-0.6425634,0.0,-0.7430253,59.0,-0.7781954,0.3393048,0.0
50%,0.0812855,0.04387273,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2019.0,7.0,14.0,-0.08661042,0.0,-0.1184433,70.0,-0.1276076,0.3393048,0.0
75%,0.6706111,0.479243,0.038,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2020.0,10.0,17.0,0.6143738,0.0,0.6185635,80.0,0.6469017,0.3393048,0.0
max,1.843612,1.99375,6.82,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,2020.0,12.0,23.0,4.602732,0.49,4.753296,100.0,4.565919,0.3393048,1.0


In [276]:
new_df['Humidity'] = new_df['Humidity(%)'] / 100

In [277]:
new_df.describe()

Unnamed: 0,Lat,Lng,Distance(mi),Crossing,Give_Way,Junction,No_Exit,Railway,Stop,Amenity,...,Month,Hour,Wind_Chill(F),Precipitation(in),Temperature(F),Humidity(%),Wind_Speed(mph),Visibility(mi),Selected,Humidity
count,6293.0,6293.0,6293.0,6293.0,6293.0,6293.0,6293.0,6293.0,6293.0,6293.0,...,6293.0,6293.0,6293.0,6293.0,6293.0,6293.0,6293.0,6293.0,6293.0,6293.0
mean,-3.348911e-14,1.807622e-13,0.134155,0.083426,0.000477,0.242174,0.000159,0.026855,0.098522,0.037343,...,6.759733,12.846178,-8.869082e-16,0.00642,-7.113331e-16,68.119021,4.9115860000000003e-17,-1.106518e-16,0.000318,0.68119
std,1.0,1.0,0.395302,0.276547,0.02183,0.428433,0.012606,0.161673,0.298043,0.189616,...,3.574934,5.813688,1.0,0.025806,1.0,16.278361,1.0,1.0,0.017826,0.162784
min,-4.789971,-3.693206,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,-3.482758,0.0,-2.991521,10.0,-1.676626,-5.644539,0.0,0.1
25%,-0.8845452,-0.2177104,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4.0,8.0,-0.6425634,0.0,-0.7430253,59.0,-0.7781954,0.3393048,0.0,0.59
50%,0.0812855,0.04387273,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,7.0,14.0,-0.08661042,0.0,-0.1184433,70.0,-0.1276076,0.3393048,0.0,0.7
75%,0.6706111,0.479243,0.038,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.0,17.0,0.6143738,0.0,0.6185635,80.0,0.6469017,0.3393048,0.0,0.8
max,1.843612,1.99375,6.82,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,12.0,23.0,4.602732,0.49,4.753296,100.0,4.565919,0.3393048,1.0,1.0
