#Importing the libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Importing the dataset

In [None]:
# Read the csv contatining data
dataset = pd.read_csv('/content/Social_Network_Ads.csv') 
X = dataset.iloc[:,:-1].values # Getting 2nd column
y = dataset.iloc[:,-1].values # getting last
X.shape, y.shape #printing the shape

# Splitting Dataset into Train and Test set

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25,
                                                    shuffle=True, random_state=42)

# Feature Scaling

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

X_train_scaled = sc.fit_transform(X_train)
X_test_scaled = sc.transform(X_test)

In [None]:
X_train_scaled[:10]

In [None]:
y_train[:10]

# Training the Kernel SVM Classification Model

In [None]:
from sklearn.svm import SVC
clf = SVC(kernel='rbf', random_state=0)
clf.fit(X_train_scaled, y_train)

# Predicting the new Result

In [None]:
preds = clf.predict(sc.transform([[30, 87000]]))
preds

# Predicting the test set result

In [None]:
y_hat = clf.predict(X_test_scaled)
print(np.concatenate((y_hat.reshape(len(y_hat), 1),
                      y_test.reshape(len(y_test), 1)), 1))

# Measure the performance of classification model using K-Fold Cross validation

In [None]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator=clf, X=X_train_scaled, y=y_train, cv=10)
print(f'Accuracy: {round(accuracies.mean()*100, 2)}%')
print(f'Standard Deviation: {round(accuracies.std()*100, 2)} %')

# Making the confusion matrix

In [None]:
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_test, y_hat))

In [None]:
from sklearn.metrics import accuracy_score
print(f'{accuracy_score(y_test, y_hat) * 100}%')

# Appliying Grid Search Method to get the best hyperparameters

In [None]:
from sklearn.model_selection import GridSearchCV
parameters = [{
                'C': [.25, .50, .75, 1],
                'kernel': ['linear']
                },
              {
                'C': [.25, .50, .75, 1],
                'kernel': ['rbf'],
                'gamma': [.2, .5, .8, .9]
            }]      
grid_search = GridSearchCV(estimator=clf,
                            param_grid=parameters,
                            scoring='accuracy',
                            cv=10, 
                            n_jobs=-1)

grid_search.fit(X_train, y_train)
best_accuracy = grid_search.best_score_
best_pars = grid_search.best_params_

print(f' Best Accuracy: {round(best_accuracy*100, 2)}%')
print(f' Best Parameters: {best_pars}')

In [None]:
# Visualising the Training set results
from matplotlib.colors import ListedColormap
X_set, y_set = sc.inverse_transform(X_train_scaled), y_train
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 10,
                               stop = X_set[:, 0].max() + 10, step = 0.75),
                     np.arange(start = X_set[:, 1].min() - 1000, 
                               stop = X_set[:, 1].max() + 1000, step = 0.75))

plt.contourf(X1, X2,
             clf.predict(sc.transform(np.array([X1.ravel(),
                                                X2.ravel()]).T)).reshape(X1.shape),
             alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[y_set == j, 0],
                X_set[y_set == j, 1],
                c = ListedColormap(('red', 'green'))(i), label = j)
plt.title('Kernel SVM (Training set)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show();


In [None]:
# Visualising the Test set results
from matplotlib.colors import ListedColormap
X_set, y_set = sc.inverse_transform(X_test_scaled), y_test
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 10, stop = X_set[:, 0].max() + 10, step = 1),
                     np.arange(start = X_set[:, 1].min() - 1000, stop = X_set[:, 1].max() + 1000, step = 1))
plt.contourf(X1, X2, clf.predict(sc.transform(np.array([X1.ravel(), X2.ravel()]).T)).reshape(X1.shape),
             alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c = ListedColormap(('red', 'green'))(i), label = j)
plt.title('Kernel SVM (Test set)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()