### Danger Level Forecasting Preliminary Models

Testing out some of the classification models

#### Import Tools

In [5]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# sklearn 
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier

#### Import Data

In [6]:
avi = pd.read_csv('snowweatheModel.csv')
avi.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Unnamed: 0_x,AVY_DANGER,AWND,SNOW,FIVE_DAY_SNOWFALL,TMAX_SWING,TMAX_SWING_FROM_AVE,WDF5,...,day_y,prevailing_wind_E,prevailing_wind_N,prevailing_wind_NE,prevailing_wind_NW,prevailing_wind_S,prevailing_wind_SE,prevailing_wind_SW,prevailing_wind_W,prevailing_wind_na
0,0,0,0,2.0,10.74,0.2,,,,320.0,...,18,0,0,0,1,0,0,0,0,0
1,1,1,1,1.0,9.4,0.1,,3.0,,180.0,...,19,0,0,0,1,0,0,0,0,0
2,2,2,2,1.0,20.58,2.2,,0.0,,360.0,...,20,0,1,0,0,0,0,0,0,0
3,3,3,3,3.0,35.12,2.0,,3.0,,360.0,...,21,0,1,0,0,0,0,0,0,0
4,4,4,4,2.0,33.78,4.1,8.6,-3.0,,360.0,...,22,0,1,0,0,0,0,0,0,0


#### filter data

In [7]:
avi = avi.drop(['Unnamed: 0', 'Unnamed: 0.1', 'Unnamed: 0_x', 'Unnamed: 0_y'], axis=1)

In [8]:
avi = avi[avi['AVY_DANGER'].notnull()]
avi.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1254 entries, 0 to 1356
Data columns (total 31 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   AVY_DANGER            1254 non-null   float64
 1   AWND                  1225 non-null   float64
 2   SNOW                  1253 non-null   float64
 3   FIVE_DAY_SNOWFALL     1244 non-null   float64
 4   TMAX_SWING            1251 non-null   float64
 5   TMAX_SWING_FROM_AVE   1242 non-null   float64
 6   WDF5                  1225 non-null   float64
 7   year_x                1254 non-null   float64
 8   month_x               1254 non-null   float64
 9   day_x                 1254 non-null   float64
 10  temp_max              1254 non-null   int64  
 11  temp_min              1254 non-null   int64  
 12  water_equivalent      1254 non-null   float64
 13  snow_fall             1254 non-null   float64
 14  snow_depth_6am        1254 non-null   float64
 15  wind_speed_sum       

#### Fill Remaing NA's

In [10]:
avi = avi.fillna(0)
avi.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1254 entries, 0 to 1356
Data columns (total 31 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   AVY_DANGER            1254 non-null   float64
 1   AWND                  1254 non-null   float64
 2   SNOW                  1254 non-null   float64
 3   FIVE_DAY_SNOWFALL     1254 non-null   float64
 4   TMAX_SWING            1254 non-null   float64
 5   TMAX_SWING_FROM_AVE   1254 non-null   float64
 6   WDF5                  1254 non-null   float64
 7   year_x                1254 non-null   float64
 8   month_x               1254 non-null   float64
 9   day_x                 1254 non-null   float64
 10  temp_max              1254 non-null   int64  
 11  temp_min              1254 non-null   int64  
 12  water_equivalent      1254 non-null   float64
 13  snow_fall             1254 non-null   float64
 14  snow_depth_6am        1254 non-null   float64
 15  wind_speed_sum       

#### Splitting Dataset

In [11]:
x = avi.iloc[:, 1:avi.shape[1]]
y = avi.iloc[:, 0]

print(x.shape)
print(y.shape)

(1254, 30)
(1254,)


In [12]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42)

In [13]:
print(y)

0       2.0
1       1.0
2       1.0
3       3.0
4       2.0
       ... 
1352    3.0
1353    3.0
1354    3.0
1355    2.0
1356    1.0
Name: AVY_DANGER, Length: 1254, dtype: float64


### Descision Trees 

In [15]:
ds = DecisionTreeClassifier()
ds.fit(x_train, y_train)
y_pred_ds = ds.predict(x_test)
print(accuracy_score(y_test, y_pred_ds))
print(cross_val_score(ds, x_train, y_train, cv=3))

0.5127388535031847
[0.48726115 0.49201278 0.55271565]




In [16]:
print(confusion_matrix(y_test, y_pred_ds))

[[58 26  6  2  0]
 [18 65 31  6  0]
 [13 27 35  7  0]
 [ 1  4 11  3  1]
 [ 0  0  0  0  0]]


In [17]:

print(classification_report(y_test, y_pred_ds))

              precision    recall  f1-score   support

         1.0       0.64      0.63      0.64        92
         2.0       0.53      0.54      0.54       120
         3.0       0.42      0.43      0.42        82
         4.0       0.17      0.15      0.16        20
         5.0       0.00      0.00      0.00         0

    accuracy                           0.51       314
   macro avg       0.35      0.35      0.35       314
weighted avg       0.51      0.51      0.51       314



  _warn_prf(average, modifier, msg_start, len(result))


### Random Forest

In [18]:
rf = RandomForestClassifier()
rf.fit(x_train, y_train)
y_pred_rf = rf.predict(x_test)
print(accuracy_score(y_test, y_pred_rf))
print(cross_val_score(rf, x_train, y_train, cv=3))

0.6624203821656051




[0.62420382 0.61022364 0.63578275]


In [19]:
print(confusion_matrix(y_test, y_pred_rf))

[[75 13  4  0]
 [ 8 92 20  0]
 [ 9 33 38  2]
 [ 0  2 15  3]]


In [20]:
print(classification_report(y_test, y_pred_rf))

              precision    recall  f1-score   support

         1.0       0.82      0.82      0.82        92
         2.0       0.66      0.77      0.71       120
         3.0       0.49      0.46      0.48        82
         4.0       0.60      0.15      0.24        20

    accuracy                           0.66       314
   macro avg       0.64      0.55      0.56       314
weighted avg       0.66      0.66      0.65       314



### Extra Forest

In [22]:
et = ExtraTreesClassifier()
et.fit(x_train, y_train)
y_pred_et = et.predict(x_test)
print(accuracy_score(y_test, y_pred_et))
print(cross_val_score(rf, x_train, y_train, cv=3))

0.6624203821656051




[0.62420382 0.59105431 0.60383387]


In [25]:
print(confusion_matrix(y_test, y_pred_et))

[[71 17  3  1]
 [10 90 20  0]
 [11 22 45  4]
 [ 0  1 17  2]]


In [26]:
print(classification_report(y_test, y_pred_et))

              precision    recall  f1-score   support

         1.0       0.77      0.77      0.77        92
         2.0       0.69      0.75      0.72       120
         3.0       0.53      0.55      0.54        82
         4.0       0.29      0.10      0.15        20

    accuracy                           0.66       314
   macro avg       0.57      0.54      0.54       314
weighted avg       0.65      0.66      0.65       314

