<a href="https://colab.research.google.com/github/RushiKanjaria/Logistic-Regression/blob/main/logistic_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Logistic Regression

## Importing the libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Importing the dataset

In [None]:
df = pd.read_csv(r"ENTER_THE_NAME_OF_YOUR_DATASET_HERE.csv")
X = df.iloc[:,2:-1].values
y = df.iloc[:,-1].values

In [None]:
colors = {0:'red',1:'green'}

In [None]:
plt.scatter(X[:,0],X[:,1], c = df['Purchased'].map(colors))
plt.title('Social Network Ads')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.show()

## Splitting the dataset into the Training set and Test set

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [None]:
print("X Train:\n\n",X_train)
print("\n\nX Test:\n\n",X_test)
print("\n\ny Train:\n\n",y_train)
print("\n\ny Test:\n\n",y_test)

## Feature Scaling

In [None]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [None]:
print("Scaled X Train:\n\n",X_train)
print("\n\nScaled X Test:\n\n",X_test)

## Training the Logistic Regression model on the Training set

In [None]:
from sklearn.linear_model import LogisticRegression

lr_cls = LogisticRegression(random_state = 0)
lr_cls.fit(X_train,y_train)

## Predicting a new result

In [None]:
print(lr_cls.predict(sc.transform([[30, 87000]])))

## Predicting the Test set results

In [None]:
y_pred = lr_cls.predict(X_test)

print("Prediction:\n",np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)), axis = 1))

## Making the Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score

print("Confusion matrix:\n",confusion_matrix(y_test,y_pred))

In [None]:
print("Accuracy:\n",accuracy_score(y_test, y_pred))

## Visualising the Training set results

In [None]:
from matplotlib.colors import ListedColormap

X_set, y_set = sc.inverse_transform(X_train), y_train
X1, X2 = np.meshgrid(np.arange(start = X_set[:,0].min()-10, stop = X_set[:,0].max()+10, step = 0.25),
                    np.arange(start = X_set[:,1].min()-1000, stop = X_set[:,1].max()+1000, step = 0.25))

plt.contourf(X1,X2, lr_cls.predict(sc.transform(np.array([X1.ravel(), X2.ravel()]).T)).reshape(X1.shape), alpha = 0.75, cmap = ListedColormap(('red','green')))

plt.xlim(X1.min(),X1.max())
plt.ylim(X2.min(), X2.max())

for i,j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[y_set == j,0], X_set[y_set == j,1], c = ListedColormap(('red','green'))(i), label = j)
    
plt.title('Logistic Regression (Training Set)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()

## Visualising the Test set results

In [None]:
from matplotlib.colors import ListedColormap

X_set, y_set = sc.inverse_transform(X_test), y_test
X1, X2 = np.meshgrid(np.arange(start = X_set[:,0].min()-10, stop = X_set[:,0].max()+10, step = 0.25),
                    np.arange(start = X_set[:,1].min()-1000, stop = X_set[:,1].max()+1000, step = 0.25))

plt.contourf(X1,X2, lr_cls.predict(sc.transform(np.array([X1.ravel(), X2.ravel()]).T)).reshape(X1.shape), alpha = 0.75, cmap = ListedColormap(('red','green')))

plt.xlim(X1.min(),X1.max())
plt.ylim(X2.min(), X2.max())

for i,j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[y_set == j,0], X_set[y_set == j,1], c = ListedColormap(('red','green'))(i), label = j)
    
plt.title('Logistic Regression (Testing Set)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()

## AUROC Curve

In [None]:
prob_predict = lr_cls.predict_proba(X_test)
prob_predict = prob_predict[:,1] #Taking probability of positive class
fpr, tpr, threshold = roc_curve(y_test, prob_predict)
auroc = auc(fpr,tpr)

In [None]:
label = 'Logistic Regression Classifier AUC:' + ' {0:.2f}'.format(auroc)
plt.plot([0,1], [0,1], 'r--')
plt.plot(fpr, tpr, c = 'green', label = label)
plt.title('Receiver Operating Characteristic')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend()
plt.show()

## CAP Curve

In [None]:
total = len(y_test)
#100
class_1_count = np.sum(y_test)
#32
class_0_count = total - class_1_count
#68

In [None]:
#Random model
plt.plot([0, total], [0, class_1_count], c = 'r', linestyle = '--', label = 'Random Model')
#Perfect Model
plt.plot([0, class_1_count, total], [0, class_1_count, class_1_count], c = 'grey', label = 'Perfect Model')
#Trained Model
probs = lr_cls.predict_proba(X_test)
probs = probs[:, 1]
model_y = [y for _, y in sorted(zip(probs, y_test), reverse = True)]
y_values = np.append([0], np.cumsum(model_y))
x_values = np.arange(0, total + 1)

#CAP Curve
plt.plot(x_values, y_values, c = 'b', label = 'Linear Regression Classifier')
plt.title('Cumulative Accuracy Profile')
plt.xlabel('Total observations')
plt.ylabel('Class 1 observations')
plt.legend(loc = 'lower right')

In [None]:
# Area under Random Model
a = auc([0, total], [0, class_1_count])

# Area between Perfect and Random Model
aP = auc([0, class_1_count, total], [0, class_1_count, class_1_count]) - a

# Area between Trained and Random Model
aR = auc(x_values, y_values) - a

print("Accuracy Rate for Logistic Regression Classifier: {}".format(aR / aP))
## Accuracy Rate for Logistic Regression Classifier: 0.9080882352941176

In [None]:
plt.figure(figsize = (10,7))
#Random model
plt.plot([0, total], [0, class_1_count], c = 'r', linestyle = '--', label = 'Random Model')
#Perfect Model
plt.plot([0, class_1_count, total], [0, class_1_count, class_1_count], c = 'grey', label = 'Perfect Model')
#Trained Model
plt.plot(x_values, y_values, c = 'b', label = 'Linear Regression Classifier')

# Point where vertical line will cut trained model
index = int((50*total / 100))

## 50% Verticcal line from x-axis
plt.plot([index, index], [0, y_values[index]], c ='g', linestyle = '--')

## Horizontal line to y-axis from prediction model
plt.plot([0, index], [y_values[index], y_values[index]], c = 'g', linestyle = '--')

plt.title('Cumulative Accuracy Profile')
plt.xlabel('Total observations')
plt.ylabel('Class 1 observations')
plt.legend()
plt.show()

In [None]:
class_1_observed = y_values[index] * 100 / max(y_values)
print(class_1_observed)