In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, auc, roc_curve
from sklearn.model_selection import RepeatedStratifiedKFold, cross_val_score
import pickle
from sklearn.metrics import make_scorer, accuracy_score, classification_report, f1_score, confusion_matrix, roc_auc_score, precision_score, recall_score, average_precision_score


In [3]:
X_train = pd.read_csv('../Resources/Datasets/X_train.csv', index_col = 'building_id')
X_test = pd.read_csv('../Resources/Datasets/X_test.csv', index_col = 'building_id')
y_train = pd.read_csv('../Resources/Datasets/y_train.csv', index_col='building_id')
y_test = pd.read_csv('../Resources/Datasets/y_test.csv', index_col='building_id')

y_train = y_train.values.ravel()
y_test = y_test.values.ravel()

### Multinomial Logistic Regression

softmax function: can be used in multi-class classication problems where the goal is to predict a single label from multiple classes. 

In [3]:
# multinomial logistic regression

logreg = LogisticRegression(multi_class='multinomial', solver='newton-cg', max_iter=1000)
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)

# accuracy, f1 score, confusion matrix

print('Accuracy of logistic regression classifier on test set: {:.2f}'.format(logreg.score(X_test, y_test)))
print(classification_report(y_test, y_pred))

# confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print(conf_matrix)




Accuracy of logistic regression classifier on test set: 0.59
              precision    recall  f1-score   support

           1       0.57      0.29      0.38      5025
           2       0.60      0.87      0.71     29652
           3       0.55      0.20      0.29     17444

    accuracy                           0.59     52121
   macro avg       0.57      0.46      0.46     52121
weighted avg       0.58      0.59      0.54     52121

[[ 1467  3443   115]
 [ 1041 25905  2706]
 [   88 13873  3483]]




In [5]:
# pickle the model

filename = '../Models/logreg_model.pkl'
pickle.dump(logreg, open(filename, 'wb'))


### Update logistic Regression Result

In [4]:
filename = '../Models/logreg_model.pkl'
logreg = pickle.load(open(filename, 'rb'))

result_df = pd.read_csv('../Resources/Datasets/results.csv', index_col=0)

y_pred = logreg.predict(X_test)
y_pred_proba = logreg.predict_proba(X_test)

result_df.loc['Logistic Regression'] = [accuracy_score(y_test, y_pred), 
                                   precision_score(y_test, y_pred, average='weighted'), 
                                   recall_score(y_test, y_pred, average='weighted'), 
                                   f1_score(y_test, y_pred, average='weighted'), 
                                   roc_auc_score(y_test, y_pred_proba, multi_class='ovr', average='weighted'), 
                                   average_precision_score(y_test, y_pred_proba, average='weighted')]

In [5]:
result_df.to_csv('../Resources/Datasets/results.csv')