### Support Vector Classifier Implementation

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
## Lets create synthetic data points
from sklearn.datasets import make_classification

In [None]:
X,y=make_classification(n_samples=1000,n_features=2,n_classes=2,
                        n_clusters_per_class=2,n_redundant=0)

'''
1. In make_classification(), the parameter n_clusters_per_class determines how many clusters (or subgroups) of points are created for each class.

Effect of n_clusters_per_class
If n_clusters_per_class=1: Each class will have all its samples clustered into a single group.
If n_clusters_per_class=2: Each class will be split into two smaller clusters, making the dataset more complex.
Increasing this value makes the classification task harder by introducing more variance within each class.




2. Think of n_redundant as adding extra useless features that are just copies or combinations of important ones.

Example in Real Life
Imagine you're describing a car:

Informative features: Engine power, weight (these really matter).
Redundant features: A feature that says, "Engine power × 2". It doesn’t add new info, just repeats in a different form.
How It Works in make_classification()
If you set n_redundant=2, it means:

The dataset has extra features that are just math-based copies of real ones.
These don’t give new information but can make the dataset harder to understand.
'''

In [None]:
X

In [None]:
y

In [None]:
pd.DataFrame(X)

In [None]:
sns.scatterplot(x=pd.DataFrame(X)[0],y=pd.DataFrame(X)[1],hue=y)
# In Seaborn's sns.scatterplot(), the hue parameter is used to color the points based on a categorical variable.

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=10)

In [None]:
from sklearn.svm import SVC

In [None]:
svc=SVC(kernel='linear')
# Since the data points are not clearly seperable so we have moree error

In [None]:
svc.fit(X_train,y_train)

In [None]:
svc.coef_

In [None]:
## Prediction
y_pred=svc.predict(X_test)

In [None]:
from sklearn.metrics import classification_report,confusion_matrix

In [None]:
print(classification_report(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))

In [None]:
rbf=SVC(kernel='rbf')

In [None]:
rbf.fit(X_train,y_train)

In [None]:
## Prediction
y_pred1=rbf.predict(X_test)

In [None]:
print(classification_report(y_test,y_pred1))
print(confusion_matrix(y_test,y_pred1))

In [None]:
polynomial=SVC(kernel='poly')
polynomial.fit(X_train,y_train)
## Prediction
y_pred2=polynomial.predict(X_test)
print(classification_report(y_test,y_pred2))
print(confusion_matrix(y_test,y_pred2))

In [None]:
sigmoid=SVC(kernel='sigmoid')
sigmoid.fit(X_train,y_train)
## Prediction
y_pred3=sigmoid.predict(X_test)
print(classification_report(y_test,y_pred3))
print(confusion_matrix(y_test,y_pred3))

In [None]:
sigmoid.intercept_

### Hyperparameter Tuning With SVC

In [None]:
from sklearn.model_selection import GridSearchCV
 
# defining parameter range
param_grid = {'C': [0.1, 1, 10, 100, 1000],
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
              'kernel': ['rbf']} # used rbf since we got high accuracy using it earlier

In [None]:
grid=GridSearchCV(SVC(),param_grid=param_grid,refit=True,cv=5,verbose=3)

In [None]:
grid.fit(X_train,y_train)

In [None]:
grid.best_params_

In [None]:
## Prediction
y_pred4=grid.predict(X_test)
print(classification_report(y_test,y_pred4))
print(confusion_matrix(y_test,y_pred4))