In [1]:
import tarfile
import os
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn import metrics

import time

In [3]:
def apply_LR(x_train, x_test, y_train, y_test):

  print('Training started at : ' +  time.strftime("%H:%M:%S", time.localtime()))
  t1 = time.time()

  model = LogisticRegression(fit_intercept=True,
                             multi_class = 'multinomial',
                             penalty = 'elasticnet', #lasso regression
                             l1_ratio = 0.5,
                             solver = 'saga',
                             max_iter = 1000,
                             C = 50,
                             verbose = 2, # output progress
                             n_jobs = 5, # parallelize over 5 processes
                             tol = 0.01,                             
                             )

  model.fit(x_train, y_train)
  y_pred = model.predict(x_test)
  y_pred_prob =  model.predict_proba(x_test)

  print("Accuracy : ", metrics.accuracy_score(y_test, y_pred), "\n")
  print("Precision : ", metrics.precision_score(y_test, y_pred,average='micro'), "\n")
  print("Recall : ", metrics.recall_score(y_test, y_pred,average='micro'), "\n")
  print("F1 score  : ", metrics.f1_score(y_test, y_pred,average='micro'), "\n")
  print("AUC score  : ", metrics.roc_auc_score(y_test, y_pred_prob,multi_class="ovo"), "\n")

  print('Training Stopped at : ' +  time.strftime("%H:%M:%S", time.localtime()))
  t2 = time.time()
    
  print('Total time taken in mins: ',(t2-t1)/60)