# Logistic Regression
## Hyper-parameter Tunning

In [1]:
import warnings
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, GridSearchCV, RepeatedStratifiedKFold
warnings.filterwarnings('ignore')

df_train = pd.read_csv('Final_Train_dataset.csv')
X_train  = df_train.iloc[:,:-1]
y_train  = df_train.VirusDetected

In [2]:
# Specify different values for the tunning process
StratifiedKFold = RepeatedStratifiedKFold(n_splits     = 5, 
                                          n_repeats    = 3, 
                                          random_state = 99)

#Create parameter grid
log_grid = [    
    {'C'       : np.logspace(-4, 4, 20),
    'solver'   : ['lbfgs','newton-cg','liblinear','sag','saga'],
    'max_iter' : [100, 1000, 2500, 5000]
    }
]
                                          

#Create LogisticRegression object
log_model  = LogisticRegression()

log_search = GridSearchCV(estimator    = log_model,
                               param_grid = log_grid,
                               cv         = StratifiedKFold,
                               verbose    = True, 
                               scoring    = 'accuracy')


In [3]:
from datetime import datetime

# This function returns the time 
def timer(start_time=None):
    if not start_time:
        start_time = datetime.now()
        return start_time
    elif start_time:
        thour, temp_sec = divmod((datetime.now() - start_time).total_seconds(), 3600)
        tmin, tsec = divmod(temp_sec, 60)
        print('\n Time taken: %i hours %i minutes and %s seconds.' % (thour, tmin, round(tsec, 2)))
        
start_time = timer(None) # timing starts from this point for "start_time" variable
log_search.fit(X_train,y_train)
timer(start_time) # timing ends here for "start_time" variable


Fitting 15 folds for each of 400 candidates, totalling 6000 fits

 Time taken: 0 hours 0 minutes and 59.57 seconds.


In [5]:
log_search.best_params_

{'C': 78.47599703514607, 'max_iter': 100, 'solver': 'lbfgs'}

Specify the optimal model 

In [6]:
optimal_model = LogisticRegression(C       = 78.47,
                                  max_iter = 100,
                                  solver   = 'lbfgs').fit(X_train, y_train)