## Classification Algorithm
## SVM - Grid

In [None]:
import pandas as pd

### Data collection

In [None]:
dataset = pd.read_csv('Social_Network_Ads.csv')

In [None]:
dataset

### Data preprocessing

In [None]:
dataset=pd.get_dummies(dataset,dtype=int,drop_first=True)

In [None]:
dataset

In [None]:
dataset.shape

In [None]:
dataset = dataset.drop('User ID', axis=1)

### Check the No of Unique classes and its count

In [None]:
dataset['Purchased'].value_counts()

### Input Output Split

In [None]:
indep = dataset[['Age','EstimatedSalary','Gender_Male']]
dep = dataset[['Purchased']]

### Train Test Split

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(indep,dep,test_size=0.20,random_state=0)

### Standardization

In [None]:
from sklearn.preprocessing import StandardScaler
SC = StandardScaler()
X_train = SC.fit_transform(X_train)
X_test = SC.transform(X_test)

In [None]:
X_train

### Train Set

### Model Creation in GridSearchCV

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

# param_grid - This enables searching over any sequence of parameter settings.
# Scoring - Strategy to evaluate the performance of the cross-validated model on the test set.
# n_jobs - -1 means using all processors
# CV - Determines the cross-validation splitting strategy

param_grid = {'kernel':['linear','rbf','poly','sigmoid'],
              'C' : [1,10,100,1000,2000,3000], 
              'gamma' : ['scale','auto']}

grid = GridSearchCV(SVC(probability=True), param_grid, refit = True,verbose = 3, n_jobs=-1,scoring = 'f1_weighted')

grid.fit(X_train,Y_train)

### Test Set

### Evaluation Metrics 
### Confusion Matix
### Classification Report

In [None]:
# print the best parameter after tuning
results = grid.cv_results_

grid_predictions = grid.predict(X_test)

from sklearn.metrics import confusion_matrix
CM = confusion_matrix(Y_test,grid_predictions)

from sklearn.metrics import classification_report
Clf_report = classification_report(Y_test,grid_predictions)

In [None]:
print("Confusion Matrix:\n",CM)

In [None]:
print("Classification Report:\n",Clf_report)

### ADDITIONAL : Check for the best params & f1_score 

In [None]:
from sklearn.metrics import f1_score

# Parameters : average{‘micro’, ‘macro’, ‘samples’, ‘weighted’, ‘binary
f1_macro_ave = f1_score(Y_test,grid_predictions, average='macro')

print('f1_macro_value of best parameter{}:'.format(grid.best_params_),f1_macro_ave)

### ADDITIONAL : Check roc_auc_score -Receiver Operating Characterstic (Area Under Curve)

In [None]:
# ROC_AUC : A performance metric used primarily for binary classification problems to evaluate the quality of a classification model.

from sklearn.metrics import roc_auc_score
roc_auc_score(Y_test,grid.predict_proba(X_test)[:,1])

### Tabulation of Parameters in GridSearchCV

In [None]:
Table=pd.DataFrame(results)
Table

### Check the Model by getting User Input

In [None]:
Age_input = int(input("AGE: "))
Salary_input = int(input("Salary: "))
Gender_input = int(input("Gender Male 0 or 1: "))

### Prediction

In [None]:
Prediction=grid.predict([[Age_input,Salary_input,Gender_input]]) 
Prediction
