In [2]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder, StandardScaler,MinMaxScaler
from sklearn.model_selection import GridSearchCV, StratifiedKFold, train_test_split
from sklearn.metrics import accuracy_score, log_loss
from sklearn.pipeline import Pipeline
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression

In [3]:
df = pd.read_csv(r"C:\Users\Administrator.DAI-PC2\Downloads\Shubham\Practical Machine Learning\ClassWork\Cases\Sonar\Sonar.csv")
le = LabelEncoder()
X = df.iloc[:,1:-1]
y = le.fit_transform(df.iloc[:,-1])

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size = 0.3, random_state=24,stratify =y)

In [5]:
## Model 1

In [6]:
svm_l = SVC(kernel = "linear",probability =True, random_state=24)
scl_std = StandardScaler()
pipe_l = Pipeline([("Scl",scl_std),("SVM",svm_l)])

In [7]:
## Model 2

In [8]:
svm_r = SVC(kernel = "rbf",probability =True, random_state=24)
scl_std = StandardScaler()
pipe_r = Pipeline([("Scl",scl_std),("SVM",svm_r)])

In [9]:
## Model 3

In [10]:
lr = LogisticRegression()

In [11]:
## Model 4

In [12]:
lda = LinearDiscriminantAnalysis()

In [13]:
## Model 5

In [14]:
dtc = DecisionTreeClassifier(random_state=24)

In [15]:
### Hard Voting

In [16]:
voting_h = VotingClassifier([("SVML",pipe_l),("SVMR",pipe_r),("LR",lr),("LDA",lda),("TREE",dtc)])

In [17]:
voting_h.fit(X_train,y_train)
y_pred  = voting_h.predict(X_test)
print("accuracy = ", accuracy_score(y_test,y_pred))

accuracy =  0.7936507936507936


In [18]:
### Soft voting

In [21]:
voting_s = VotingClassifier([("SVML",pipe_l),("SVMR",pipe_r),("LR",lr),("LDA",lda),("TREE",dtc)],voting="soft")

In [22]:
voting_s.fit(X_train,y_train)
y_pred  = voting_s.predict(X_test)
print("accuracy = ", accuracy_score(y_test,y_pred))
y_pred_prob  = voting_s.predict_proba(X_test)[:,1]
print("log_loss = ", log_loss(y_test,y_pred_prob))

accuracy =  0.7619047619047619
log_loss =  0.4600481665968838


In [23]:
##### GridSearch

In [24]:
print(voting_s.get_params())

{'estimators': [('SVML', Pipeline(steps=[('Scl', StandardScaler()),
                ('SVM',
                 SVC(kernel='linear', probability=True, random_state=24))])), ('SVMR', Pipeline(steps=[('Scl', StandardScaler()),
                ('SVM', SVC(probability=True, random_state=24))])), ('LR', LogisticRegression()), ('LDA', LinearDiscriminantAnalysis()), ('TREE', DecisionTreeClassifier(random_state=24))], 'flatten_transform': True, 'n_jobs': None, 'verbose': False, 'voting': 'soft', 'weights': None, 'SVML': Pipeline(steps=[('Scl', StandardScaler()),
                ('SVM',
                 SVC(kernel='linear', probability=True, random_state=24))]), 'SVMR': Pipeline(steps=[('Scl', StandardScaler()),
                ('SVM', SVC(probability=True, random_state=24))]), 'LR': LogisticRegression(), 'LDA': LinearDiscriminantAnalysis(), 'TREE': DecisionTreeClassifier(random_state=24), 'SVML__memory': None, 'SVML__steps': [('Scl', StandardScaler()), ('SVM', SVC(kernel='linear', probability=Tru

In [25]:
kfold = StratifiedKFold(n_splits=5,shuffle=True,random_state=24)

In [26]:
params = {"SVML__SVM__C":np.linspace(0.001,3,5),"SVMR__SVM__C":np.linspace(0.001,3,5),"LR__C":np.linspace(0.001,3,5),
          "SVMR__SVM__gamma":np.linspace(0.001,3,5),"TREE__max_depth":[None,2,3]}
gcv = GridSearchCV(voting_s,param_grid=params,cv=kfold,scoring='neg_log_loss',n_jobs=-1)
gcv.fit(X,y)
print(gcv.best_params_)
print(gcv.best_score_)

{'LR__C': 3.0, 'SVML__SVM__C': 0.75075, 'SVMR__SVM__C': 3.0, 'SVMR__SVM__gamma': 0.001, 'TREE__max_depth': None}
-0.46040789391720705
