In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [46]:
def perform_kernalPca(x_train,x_test):
    from sklearn.decomposition import KernelPCA
    kpca = KernelPCA(n_components = 4, kernel = 'rbf')
    x_train_trans = kpca.fit_transform(x_train)
    x_test_trans= kpca.transform(x_test)
    return x_train_trans,x_test_trans

def split_scaler(indep_X,dep_Y):
    x_train, x_test, y_train, y_test=train_test_split(indep_X,dep_Y,test_size=0.25,random_state=0)
    sc=StandardScaler()
    x_train=sc.fit_transform(x_train)
    x_test=sc.transform(x_test)
    return x_train, x_test, y_train, y_test

# prediction and evalation metrics
def cm_prediction(classifier,x_test):
    y_pred=classifier.predict(x_test)
    
    from sklearn.metrics import confusion_matrix
    cm=confusion_matrix(y_test,y_pred)
    
    from sklearn.metrics import classification_report
    clf_report=classification_report(y_test,y_pred)
    
    from sklearn.metrics import accuracy_score
    accuracy=accuracy_score(y_test,y_pred)
    return classifier,cm,clf_report,accuracy,x_test,y_test

def logistic(x_train, y_train, x_test):
    from sklearn.linear_model import LogisticRegression
    classifier=LogisticRegression(random_state=0)
    classifier.fit(x_train, y_train)
    classifier,cm,clf_report,accuracy,x_test,y_test = cm_prediction(classifier,x_test)
    return classifier,cm,clf_report,accuracy,x_test,y_test

def svm_linear(x_train, y_train, x_test):
    from sklearn.svm import SVC
    classifier=SVC(C=1.0, kernel='linear', degree=3, gamma='scale',random_state=0)
    classifier.fit(x_train, y_train)
    classifier,cm,clf_report,accuracy,x_test,y_test = cm_prediction(classifier,x_test)
    return classifier,cm,clf_report,accuracy,x_test,y_test

def svm_nonlinear(x_train, y_train, x_test):
    from sklearn.svm import SVC
    classifier=SVC(C=1.0, kernel='rbf', degree=3, gamma='scale',random_state=0)
    classifier.fit(x_train, y_train)
    classifier,cm,clf_report,accuracy,x_test,y_test = cm_prediction(classifier,x_test)
    return classifier,cm,clf_report,accuracy,x_test,y_test

def naivebayes(x_train, y_train, x_test):
    from sklearn.naive_bayes import GaussianNB
    classifier=GaussianNB()
    classifier.fit(x_train, y_train)
    classifier,cm,clf_report,accuracy,x_test,y_test = cm_prediction(classifier,x_test)
    return classifier,cm,clf_report,accuracy,x_test,y_test

def knn(x_train, y_train, x_test):
    from sklearn.neighbors import KNeighborsClassifier
    classifier=KNeighborsClassifier(n_neighbors=10, p=4, metric='minkowski',weights='distance')
    classifier.fit(x_train, y_train)
    classifier,cm,clf_report,accuracy,x_test,y_test = cm_prediction(classifier,x_test)
    return classifier,cm,clf_report,accuracy,x_test,y_test

def randomForest(x_train, y_train, x_test):
    from sklearn.ensemble import RandomForestClassifier
    classifier=RandomForestClassifier(n_estimators=50, max_features='sqrt')
    classifier.fit(x_train, y_train)
    classifier,cm,clf_report,accuracy,x_test,y_test = cm_prediction(classifier,x_test)
    return classifier,cm,clf_report,accuracy,x_test,y_test

def decisionTree(x_train, y_train, x_test):
    from sklearn.tree import DecisionTreeClassifier
    classifier=DecisionTreeClassifier(criterion='entropy', splitter= 'random', max_depth= 6)
    classifier.fit(x_train, y_train)
    classifier,cm,clf_report,accuracy,x_test,y_test = cm_prediction(classifier,x_test)
    return classifier,cm,clf_report,accuracy,x_test,y_test

def kernalpca_table(acclog,accsvm_l,accsvm_nl,accknn,accnaive,accdes,accrf):
    dataframe=pd.DataFrame(index=['KernalPca'],columns=['logistic','svm_linear','svm_nonlinear','Naive','KNN','DTree','RForest'])
    for number,idex in enumerate(dataframe.index):
        dataframe['logistic'][idex]=acclog[number]
        dataframe['svm_linear'][idex]=accsvm_l[number]
        dataframe['svm_nonlinear'][idex]=accsvm_nl[number]
        dataframe['Naive'][idex]=accnaive[number]
        dataframe['KNN'][idex]=accknn[number]
        dataframe['DTree'][idex]=accdes[number]
        dataframe['RForest'][idex]=accrf[number]
        return dataframe


In [47]:
# Data Collection
dataset=pd.read_csv('Social_Network_Ads.csv',index_col=None)
df=dataset
df

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19.0,19000.0,0
1,15810944,Male,35.0,20000.0,0
2,15668575,Female,26.0,43000.0,0
3,15603246,Female,27.0,57000.0,0
4,15804002,Male,19.0,76000.0,0
...,...,...,...,...,...
395,15691863,Female,46.0,41000.0,1
396,15706071,Male,51.0,23000.0,1
397,15654296,Female,50.0,20000.0,1
398,15755018,Male,36.0,33000.0,0


In [48]:
# Data Preprocessing
df=pd.get_dummies(df,drop_first=True)

In [49]:
df

Unnamed: 0,User ID,Age,EstimatedSalary,Purchased,Gender_Male
0,15624510,19.0,19000.0,0,True
1,15810944,35.0,20000.0,0,True
2,15668575,26.0,43000.0,0,False
3,15603246,27.0,57000.0,0,False
4,15804002,19.0,76000.0,0,True
...,...,...,...,...,...
395,15691863,46.0,41000.0,1,False
396,15706071,51.0,23000.0,1,True
397,15654296,50.0,20000.0,1,False
398,15755018,36.0,33000.0,0,True


In [50]:
# input output split
indep_x=df.drop('Purchased',axis=1)
dep_y=df['Purchased']

In [51]:
x_train,x_test,y_train,y_test=split_scaler(indep_x,dep_y)

In [52]:
x1_train_trans,x2_test_trans =  perform_kernalPca(x_train,x_test)

In [53]:
acclog=[]
accsvm_l=[]
accsvm_nl=[]
accknn=[]
accnaive=[]
accdes=[]
accrf=[]

classifier,cm,clf_report,accuracy,x_test,y_test=logistic(x1_train_trans,y_train,x2_test_trans)
acclog.append(accuracy)

classifier,cm,clf_report,accuracy,x_test,y_test=svm_linear(x1_train_trans,y_train,x2_test_trans)
accsvm_l.append(accuracy)

classifier,cm,clf_report,accuracy,x_test,y_test=svm_nonlinear(x1_train_trans,y_train,x2_test_trans)
accsvm_nl.append(accuracy)

classifier,cm,clf_report,accuracy,x_test,y_test=naivebayes(x1_train_trans,y_train,x2_test_trans)
accnaive.append(accuracy)

classifier,cm,clf_report,accuracy,x_test,y_test=knn(x1_train_trans,y_train,x2_test_trans)
accknn.append(accuracy)

classifier,cm,clf_report,accuracy,x_test,y_test=randomForest(x1_train_trans,y_train,x2_test_trans)
accrf.append(accuracy)

classifier,cm,clf_report,accuracy,x_test,y_test=decisionTree(x1_train_trans,y_train,x2_test_trans)
accdes.append(accuracy)

In [55]:
result=kernalpca_table(acclog,accsvm_l,accsvm_nl,accknn,accnaive,accdes,accrf)

In [34]:
result
#2

Unnamed: 0,logistic,svm_linear,svm_nonlinear,Naive,KNN,DTree,RForest
KernalPca,0.61,0.68,0.77,0.64,0.72,0.73,0.71


In [45]:
result
#3

Unnamed: 0,logistic,svm_linear,svm_nonlinear,Naive,KNN,DTree,RForest
KernalPca,0.85,0.87,0.9,0.88,0.87,0.81,0.88


In [56]:
result
#4

Unnamed: 0,logistic,svm_linear,svm_nonlinear,Naive,KNN,DTree,RForest
KernalPca,0.88,0.86,0.93,0.9,0.91,0.83,0.93
