### Importing libraries

In [19]:
import numpy as np
import pandas as pd

### Importing Dataset

In [20]:
dataset = pd.read_csv('../datasets/weatherAUS.csv')
X = dataset.iloc[:,[2,3,4,7,8,13,15]].values
Y = dataset.iloc[:,-1].values

In [21]:
print(X)

[[13.4 22.9 0.6 ... 44.0 71.0 1007.7]
 [7.4 25.1 0.0 ... 44.0 44.0 1010.6]
 [12.9 25.7 0.0 ... 46.0 38.0 1007.6]
 ...
 [5.4 26.9 0.0 ... 37.0 53.0 1021.0]
 [7.8 27.0 0.0 ... 28.0 51.0 1019.4]
 [14.9 nan 0.0 ... nan 62.0 1020.2]]


In [22]:
print(Y)

['No' 'No' 'No' ... 'No' 'No' nan]


In [23]:
Y = Y.reshape(-1,1)

### Dealing with invalid Data

In [24]:
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values=np.nan,strategy='most_frequent')
X = imputer.fit_transform(X)
Y = imputer.fit_transform(Y)

In [25]:
print(X)

[[13.4 22.9 0.6 ... 44.0 71.0 1007.7]
 [7.4 25.1 0.0 ... 44.0 44.0 1010.6]
 [12.9 25.7 0.0 ... 46.0 38.0 1007.6]
 ...
 [5.4 26.9 0.0 ... 37.0 53.0 1021.0]
 [7.8 27.0 0.0 ... 28.0 51.0 1019.4]
 [14.9 20.0 0.0 ... 35.0 62.0 1020.2]]


In [26]:
print(Y)

[['No']
 ['No']
 ['No']
 ...
 ['No']
 ['No']
 ['No']]


### Encoding Dataset

In [27]:
from sklearn.preprocessing import LabelEncoder
le1 = LabelEncoder()
X[:,3] = le1.fit_transform(X[:,3])
le2 = LabelEncoder()
Y[:,-1] = le2.fit_transform(Y[:,-1])

In [28]:
print(X)

[[13.4 22.9 0.6 ... 44.0 71.0 1007.7]
 [7.4 25.1 0.0 ... 44.0 44.0 1010.6]
 [12.9 25.7 0.0 ... 46.0 38.0 1007.6]
 ...
 [5.4 26.9 0.0 ... 37.0 53.0 1021.0]
 [7.8 27.0 0.0 ... 28.0 51.0 1019.4]
 [14.9 20.0 0.0 ... 35.0 62.0 1020.2]]


In [29]:
print(Y)

[[0]
 [0]
 [0]
 ...
 [0]
 [0]
 [0]]


In [30]:
Y = np.array(Y,dtype=float)
print(Y)

[[0.]
 [0.]
 [0.]
 ...
 [0.]
 [0.]
 [0.]]


### Splitting Dataset into Training set and Test set

In [31]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=0)

In [32]:
print(X_train)

[[18.8 23.7 0.2 ... 52.0 74.0 1016.6]
 [9.3 24.0 0.2 ... 48.0 74.0 1018.3]
 [10.9 22.2 1.4 ... 26.0 85.0 1017.6]
 ...
 [17.0 29.8 0.0 ... 22.0 77.0 1019.6]
 [11.9 22.0 3.8 ... 35.0 55.0 1020.0]
 [12.0 16.7 0.8 ... 50.0 59.0 1032.0]]


In [33]:
print(Y_train)

[[1.]
 [0.]
 [0.]
 ...
 [0.]
 [0.]
 [0.]]


### Training Model

In [34]:
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_val_score

LogReg = LogisticRegression(random_state=2)

LogReg.fit(X_train,Y_train)

predicted_values = LogReg.predict(X_test)

x = metrics.accuracy_score(Y_test, predicted_values)

print("Logistic Regression's Accuracy is: ", x)

print()
print("Confusion Matrix: ")
print(metrics.confusion_matrix(Y_test,predicted_values))
print()
ax=classification_report(Y_test,predicted_values)
print(ax)

# Cross validation score (Logistic Regression)
score = cross_val_score(LogReg,X,Y,cv=5)

score

"""saving trained Logistic Regression Model"""

import pickle
# Dump the trained Naive Bayes classifier with Pickle
LR_pkl_filename = '../models/LogisticRegression.pkl'
# Open the file to save as pkl file
LR_Model_pkl = open(LR_pkl_filename, 'wb')
pickle.dump(LogReg, LR_Model_pkl)
# Close the pickle instances
LR_Model_pkl.close()

  y = column_or_1d(y, warn=True)


Logistic Regression's Accuracy is:  0.8120445483294376

Confusion Matrix: 
[[21799   927]
 [ 4541  1825]]

              precision    recall  f1-score   support

         0.0       0.83      0.96      0.89     22726
         1.0       0.66      0.29      0.40      6366

    accuracy                           0.81     29092
   macro avg       0.75      0.62      0.64     29092
weighted avg       0.79      0.81      0.78     29092



  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


In [35]:
print(Y_test)

[[1.]
 [1.]
 [0.]
 ...
 [1.]
 [0.]
 [0.]]


In [36]:
print(X[0])

[13.4 22.9 0.6 13 44.0 71.0 1007.7]


In [38]:
print(X[12],Y[12])

[15.9 18.6 15.6 13 61.0 76.0 994.3] [1.]
