In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('seattle-weather.csv')
df

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather
0,2012-01-01,0.0,12.8,5.0,4.7,drizzle
1,2012-01-02,10.9,10.6,2.8,4.5,rain
2,2012-01-03,0.8,11.7,7.2,2.3,rain
3,2012-01-04,20.3,12.2,5.6,4.7,rain
4,2012-01-05,1.3,8.9,2.8,6.1,rain
...,...,...,...,...,...,...
1456,2015-12-27,8.6,4.4,1.7,2.9,rain
1457,2015-12-28,1.5,5.0,1.7,1.3,rain
1458,2015-12-29,0.0,7.2,0.6,2.6,fog
1459,2015-12-30,0.0,5.6,-1.0,3.4,sun


In [3]:
df['date'] = pd.to_datetime(df['date'])
df['day'] = df['date'].dt.day
df['month'] = df['date'].dt.month
df['year'] = df['date'].dt.year

In [4]:
df.drop(columns=['date'],axis=1,inplace=True)
df.head()

Unnamed: 0,precipitation,temp_max,temp_min,wind,weather,day,month,year
0,0.0,12.8,5.0,4.7,drizzle,1,1,2012
1,10.9,10.6,2.8,4.5,rain,2,1,2012
2,0.8,11.7,7.2,2.3,rain,3,1,2012
3,20.3,12.2,5.6,4.7,rain,4,1,2012
4,1.3,8.9,2.8,6.1,rain,5,1,2012


In [5]:
X = df.drop(columns=['weather'], axis=1)
y = df['weather']

In [6]:
X = X.astype(int)

In [7]:
X

Unnamed: 0,precipitation,temp_max,temp_min,wind,day,month,year
0,0,12,5,4,1,1,2012
1,10,10,2,4,2,1,2012
2,0,11,7,2,3,1,2012
3,20,12,5,4,4,1,2012
4,1,8,2,6,5,1,2012
...,...,...,...,...,...,...,...
1456,8,4,1,2,27,12,2015
1457,1,5,1,1,28,12,2015
1458,0,7,0,2,29,12,2015
1459,0,5,-1,3,30,12,2015


In [8]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score


scaler = StandardScaler()

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

In [10]:
X_test

Unnamed: 0,precipitation,temp_max,temp_min,wind,day,month,year
892,0,23,11,2,11,6,2014
1105,5,7,6,0,10,1,2015
413,0,9,4,3,17,2,2013
522,0,26,12,2,6,6,2013
1036,1,13,7,2,2,11,2014
...,...,...,...,...,...,...,...
1361,0,20,8,1,23,9,2015
802,0,13,5,2,13,3,2014
651,0,15,6,1,13,10,2013
722,1,11,6,5,23,12,2013


In [11]:
X_train_scale = scaler.fit_transform(X_train)
X_test_scale = scaler.fit_transform(X_test)

In [12]:
X_train_scale

array([[-0.43675834, -1.50692954, -2.20990514, ..., -1.54960835,
         1.60943041,  0.44021798],
       [ 2.25998388, -0.41021254,  0.43411028, ...,  1.62354245,
         0.73441968, -0.44935745],
       [ 2.25998388, -0.54730216, -0.37943293, ..., -0.64299383,
        -1.01560178,  0.44021798],
       ...,
       [-0.43675834, -0.13603328, -0.17604713, ..., -0.64299383,
        -0.43226129,  0.44021798],
       [-0.43675834, -1.50692954, -1.80313354, ...,  1.62354245,
         1.60943041,  1.3297934 ],
       [-0.43675834, -1.23275029, -0.98959033, ...,  1.73686926,
        -1.59894227,  1.3297934 ]], shape=(1168, 7))

In [13]:
X_train

Unnamed: 0,precipitation,temp_max,temp_min,wind,day,month,year
1066,0,5,-3,5,2,12,2014
638,18,13,10,6,30,9,2013
799,18,12,6,2,10,3,2014
380,0,6,0,2,15,1,2013
303,34,15,12,2,30,10,2012
...,...,...,...,...,...,...,...
1130,8,10,4,1,4,2,2015
1294,0,33,17,3,18,7,2015
860,0,15,7,2,10,5,2014
1459,0,5,-1,3,30,12,2015


In [14]:
models = {
    'Logistic Regrassion' : LogisticRegression(),
    'KNN' : KNeighborsClassifier(),
    'SVM' : SVC(),
    'Naive Bayes' : GaussianNB(),
    'Decision tree' : DecisionTreeClassifier()
}

In [15]:
results = []

In [16]:
for name, model in models.items():
    if (name == 'Naive Bayes' or name == 'Decision tree'):
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        acc = accuracy_score(y_test, y_pred)
        recall = recall_score(y_test, y_pred, average='weighted')
        precision = precision_score(y_test, y_pred, average='weighted')
        f1 = f1_score(y_test, y_pred, average='weighted')
        results.append({
            'Name':name,
            'Accuracy':acc,
            'Recall':recall,
            'Precision':precision,
            'F1_Score':f1
        })
    else:
        model.fit(X_train_scale, y_train)
        y_pred = model.predict(X_test_scale)
        acc = accuracy_score(y_test, y_pred)
        recall = recall_score(y_test, y_pred, average='weighted')
        precision = precision_score(y_test, y_pred, average='weighted')
        f1 = f1_score(y_test, y_pred, average='weighted')
        results.append({
            'Name':name,
            'Accuracy':acc,
            'Recall':recall,
            'Precision':precision,
            'F1_Score':f1
        })

In [17]:
df_results = pd.DataFrame(results)
df_results.tail()

Unnamed: 0,Name,Accuracy,Recall,Precision,F1_Score
0,Logistic Regrassion,0.767918,0.767918,0.771237,0.717923
1,KNN,0.696246,0.696246,0.691999,0.682084
2,SVM,0.761092,0.761092,0.655364,0.702865
3,Naive Bayes,0.764505,0.764505,0.823797,0.736582
4,Decision tree,0.706485,0.706485,0.701175,0.702036


In [18]:
X_column = ['precipitation', 'temp_max', 'temp_min', 'wind', 'month']
import joblib
joblib.dump(models['Naive Bayes'],'LR_heart.pkl')
joblib.dump(X_column,'Columns.pkl')


['Columns.pkl']

In [19]:
y

0       drizzle
1          rain
2          rain
3          rain
4          rain
         ...   
1456       rain
1457       rain
1458        fog
1459        sun
1460        sun
Name: weather, Length: 1461, dtype: object