In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/Divyanshu-ISM/100-Days-Of-ML-Code/master/datasets/Social_Network_Ads.csv')


In [None]:

df.head()

In [None]:
df.to_csv('social_network_ads.csv')

In [None]:
df.shape

In [None]:
df.isna().sum()

In [None]:
## Drop UserId

df.drop(['User ID'], axis=1,inplace=True)

In [None]:
df.head()

In [None]:
df_model = df.copy()

### Prepare the dataset for machine learning

In [None]:
df_model.info()

In [None]:
# Change Gender into numerical variables

df_model['Gender'] = df_model["Gender"].astype('category').cat.codes

In [None]:
df_model.head()

### Set feature and target 


In [None]:
# X = features
# Y = target

X = df_model.iloc[:,:-1]
Y = df_model.iloc[:,-1]

In [None]:
X.head()

In [None]:
Y.head()

### Train Test Split

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
x_train,x_test,y_train,y_test = train_test_split(X,Y, test_size=0.3, random_state=101)

In [None]:
x_train.shape,y_train.shape 

In [None]:
x_test.shape, y_test.shape

### Feature Scaling

>> Since KNN is a distance based algorithm

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
scaler = StandardScaler()

In [None]:
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.fit_transform(x_test)

### Fitting the KNN model

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(x_train_scaled,y_train)

### Predictions and Evaluations

In [None]:
y_pred = knn.predict(x_test_scaled)

In [None]:
from sklearn.metrics import classification_report, accuracy_score

In [None]:
print(classification_report(y_test,y_pred))

In [None]:
pd.crosstab(y_test,y_pred)

In [None]:
accuracy_score(y_test,y_pred)

### The Accuracy of the model is 90%

### Finding best K value based on Accuracy score 

In [None]:
ks = np.linspace(1,30,dtype='int64')

In [None]:
acc = []

for k in ks:
    kn = KNeighborsClassifier(n_neighbors=k)
    kn.fit(x_train_scaled,y_train)
    pred = kn.predict(x_test_scaled)
    
    a = accuracy_score(y_test,pred)
    acc.append(a)


In [None]:
plt.style.use('default')
plt.plot(ks,acc, label='Accuracy')
plt.ylabel('Accuracy Score', size=10)
plt.xlabel('K value', size=10)
plt.legend()
plt.grid()

From the above graph, you can see that the accuracy is above 93% for k value equal to 5

In [None]:
## Lets train the model with K value equal to 5

bestk = KNeighborsClassifier(n_neighbors=5)
bestk.fit(x_train_scaled,y_train)
best_pred = bestk.predict(x_test_scaled)

In [None]:
accuracy_score(y_test,best_pred)

In [None]:
print(classification_report(y_test,best_pred))

### The accuracy of the knn model is 93%

In [None]:
pd.crosstab(y_test,best_pred)

# Let's Try Support Vector Machines and see the accuracy score

In [None]:
from sklearn.svm import SVC

## Train the model 

In [None]:
svc_model = SVC()

In [None]:
svc_model.fit(x_train_scaled,y_train)

## Testing and Evaluations



In [None]:
svc_pred = svc_model.predict(x_test_scaled)

In [None]:
print(classification_report(y_test,svc_pred))

In [None]:
accuracy_score(y_test,svc_pred)

In [None]:
pd.crosstab(y_test,svc_pred)

### Lets try to improve the SVC model

## GridSearchCV

In [None]:
param_grid =  {'C':[0.1,1,10,100], 'gamma':[1,0.1,0.01,0.001], 'kernel':['rbf']}

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
grid = GridSearchCV(SVC(),param_grid,refit=True, verbose=3)

In [None]:
## Retrain the model

grid.fit(x_train_scaled,y_train)


### Predictions and Evaluations

In [None]:
grid_pred = grid.predict(x_test_scaled)

In [None]:
## lets check best score and best estimator
grid.best_estimator_

In [None]:
grid.best_score_

In [None]:
print(classification_report(y_test,grid_pred))

In [None]:
pd.crosstab(y_test,grid_pred)