# Logistic Regression

## Importing the libraries

In [1]:
import pandas as pd
import numpy as np

## Importing the dataset

In [2]:
dataset = pd.read_csv('breast_cancer.csv')
x = dataset.iloc[:, 1:-1].values
y = dataset.iloc[:, -1].values

## Splitting the dataset into the Training set and Test set

In [3]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)

## Training the Logistic Regression model on the Training set

In [4]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state = 0)
classifier.fit(x_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=0, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

## Predicting the Test set results

In [5]:
y_pred = classifier.predict(x_test)

## Making the Confusion Matrix

In [6]:
from sklearn.metrics import confusion_matrix,accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(accuracy_score(y_test,y_pred))

[[84  3]
 [ 3 47]]
0.9562043795620438


# Applying Grid Search to find the best model and the best parameters

In [7]:
from sklearn.model_selection import GridSearchCV
tuning_parameters=np.linspace(0.01,1.0,100)
tuning_parameters=[round(i,2)for i in tuning_parameters]
solvers = ['newton-cg', 'lbfgs', 'liblinear']
penalty = ['l2']
parameters = [{'C':tuning_parameters,'solver':solvers,'penalty':penalty}]
grid_search = GridSearchCV(estimator = classifier,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 10,
                           n_jobs = -1)
grid_search.fit(x_train, y_train)
best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_
print("Best Accuracy: {:.2f} %".format(best_accuracy*100))
print("Best Parameters:", best_parameters)

Best Accuracy: 97.06 %
Best Parameters: {'C': 0.06, 'penalty': 'l2', 'solver': 'newton-cg'}


## Training the Logistic Regression model on the Training set with tunned hyperparameters

In [8]:
new_classifier = LogisticRegression(random_state = 0,C=0.06,penalty='l2',solver='newton-cg')
new_classifier.fit(x_train, y_train)
new_y_pred=new_classifier.predict(x_test)

## Computing the accuracy with k-Fold Cross Validation

In [9]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = new_classifier, X = x_train, y = y_train, cv = 10)
print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))

Accuracy: 97.06 %
Standard Deviation: 1.87 %
