## Importing the libraries

In [None]:
import numpy as np 
import pandas as pd 
import pickle as pkl 

from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import StandardScaler 

import matplotlib.pyplot as plt 

from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression 
from sklearn.metrics import confusion_matrix, accuracy_score

## Importing the dataset

In [None]:
dataset = pd.read_csv('Our_Dataset.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

## Splitting the dataset into the Training set and Test set

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) 

In [None]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

## PCA I

In [None]:
pca = PCA().fit(X_train)

% matplotlib inline 

plt.rcParams["figure.figsize"] = (12,6)

fig, ax = plt.subplots()
xi = np.arange(1, 11, step=1)
y = np.cumsum(pca.explained_variance_ratio_)

plt.ylim(0.0,1.1)
plt.plot(xi, y, marker='o', linestyle='--', color='b')

plt.xlabel('Number of Components')
plt.xticks(np.arange(0, 11, step=1)) #change from 0-based array index to 1-based human-readable label
plt.ylabel('Cumulative variance (%)')
plt.title('The number of components needed to explain variance')

plt.axhline(y=0.95, color='r', linestyle='-')
plt.text(0.5, 0.85, '95% cut-off threshold', color = 'red', fontsize=16)

ax.grid(axis='x')
plt.show()

## PCA II

In [None]:
pca = PCA()
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)

In [None]:
explained_variance = pca.explained_variance_ratio_ 

# Determination of PCA Number

In [None]:
defined_number = 8 

### Resetting X and Y variables. 

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) 
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

### Applying PCA to Dataset

In [None]:
pca = PCA(n_components= defined_number)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)

## Training Logistic Regression model on the Training set with various penalties & solvers

In [None]:
Penalties = ['l1','l2','elasticnet'] 
Solvers = ['newton-cg','lbfgs','liblinear','sag','saga'] 

In [None]:
Combination_1 = ['newton-cg','l2'] 
Combination_2 = ['lbfgs','l1']
Combination_3 = ['lbfgs','l2'] 
Combination_4 = ['sag','l2'] 
Combination_5 = ['saga','l2']  
Combination_6 = ['saga','elasticnet'] 

Combinations = [Combination_1,Combination_2,Combination_3,Combination_4,Combination_5,Combination_6] 

In [None]:
for i in Combinations:
    for k,l in i:

        classifier = LogisticRegression(penalty= l, 
        solver = k,
        random_state = 0) #If another model proved to be more accurate, replace here
        classifier.fit(X_train, y_train)    

        y_pred = classifier.predict(X_test)
        cm = confusion_matrix(y_test, y_pred)
        print('Penalty:',l,'Solver',k) 
        print(cm)
        accuracy_score(y_test, y_pred)

### Choosing Best Classifier

In [None]:
l,k  = 'elasticnet', 'l2'

classifier = LogisticRegression(penalty= l, 
solver = k,
random_state = 0) #If another model proved to be more accurate, replace here
classifier.fit(X_train, y_train)    

y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print('Penalty:',l,'Solver',k) 
print(cm)
accuracy_score(y_test, y_pred)

# Saving model

In [None]:
pkl.dump(classifier, open('classifier.pkl', 'wb'))