In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler

In [2]:
X_train = pd.read_csv("X_train.csv")
X_train = X_train.iloc[:, 1:]
X_test = pd.read_csv("X_test.csv")
X_test = X_test.iloc[:, 1:]
Y_train = pd.read_csv("y_train.csv")
Y_test = pd.read_csv("y_test.csv")
Y_train = Y_train['Rating as Factor'].astype('category') #factorize trainset
Y_test = Y_test['Rating as Factor'].astype('category')   #factorize testset

In [3]:
def LogReg(X_train, Y_train):
    '''
    X_train: Training Set of X values
    Y_train: Training Set of Y values(factorized)
    '''
    lor = LogisticRegression(max_iter=100, tol=0.001,random_state=1, n_jobs=-1,solver='saga',warm_start=True)

    pipe = Pipeline([('scaler', StandardScaler()), 
                     ('logreg', lor)])

    param_grid = {'logreg__penalty': ['elasticnet'], #elastic nets combines l1&l2
                  'logreg__C':[6.5,7,7.5,8,8.5],
                  'logreg__l1_ratio':[0,2,0.225,0.25,1]} #if 0 or 1 then l2 or l1 would be best (combination is usually better)

    grid = GridSearchCV(pipe, param_grid=param_grid, cv=5, n_jobs=-1)
    grid = grid.fit(X_train,Y_train)
    
    return(grid)

In [None]:
grid = LogReg(X_train,Y_train)
print('Best parameters:', grid.best_params_)
print('Best CV accuracy:', grid.best_score_)
print('Test score:', grid.score(X_test,Y_test))