In [None]:
import numpy as np 
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score

## Import Data

In [None]:
X=pd.read_csv("/kaggle/input/hit-prediction-processed-data/Hit Prediction/X.csv")
X_train=pd.read_csv("/kaggle/input/hit-prediction-processed-data/Hit Prediction/X_train.csv")
X_test=pd.read_csv("/kaggle/input/hit-prediction-processed-data/Hit Prediction/X_test.csv")
X_selected=pd.read_csv("/kaggle/input/hit-prediction-processed-data/Hit Prediction/X_selected.csv")
X_train_selected=pd.read_csv("/kaggle/input/hit-prediction-processed-data/Hit Prediction/X_train_selected.csv")
X_test_selected=pd.read_csv("/kaggle/input/hit-prediction-processed-data/Hit Prediction/X_test_selected.csv")
y=np.load("/kaggle/input/hit-prediction-processed-data/Hit Prediction/y.npy")
y_train=np.load("/kaggle/input/hit-prediction-processed-data/Hit Prediction/y_train.npy")
y_test=np.load("/kaggle/input/hit-prediction-processed-data/Hit Prediction/y_test.npy")
y_selected=np.load("/kaggle/input/hit-prediction-processed-data/Hit Prediction/y_selected.npy")

## Fitting Basic Model with default Parameters

In [None]:
clf=LogisticRegression()
clf.fit(X_train_selected,y_train)
clf.score(X_test_selected,y_test)

## Hyper Parameter Tuning Using Grid Search CV

### Define Hyper Parameter Grid

In [None]:
parameters = {
'solver':["lbfgs", "newton-cg", "sag", "saga"],
'penalty':[ 'None','l1','l2','elasticnet'],
'C':[0.1,0.5,1.0]
}

### Base Model

In [None]:
clf=LogisticRegression(random_state=42,max_iter=10000)

### Perform GridSearchCV 

In [None]:
gcv = GridSearchCV(clf, parameters,cv=5,verbose=False, n_jobs=-1)

In [None]:
gcv.fit(X_train_selected,y_train)

In [None]:
# To show the different combinations
# pd.DataFrame(gcv.cv_results_)

### Best Parameters

In [None]:
gcv.best_estimator_

## Model Fit with best Parameters

In [None]:
clf=gcv.best_estimator_
clf.fit(X_train_selected,y_train)
clf.score(X_test_selected,y_test)

### Confusion Matrix

In [None]:
pred=clf.predict(X_test_selected)
print(confusion_matrix(y_test,pred))

### Classification Report

In [None]:
print(classification_report(y_test,pred))

### Cross Validation Score

In [None]:
scores = cross_val_score(clf, X_selected, y_selected, cv=5)
scores

In [None]:
print("%0.2f accuracy with a standard deviation of %0.2f" % (scores.mean(), scores.std()))