In [1]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [2]:
from sklearn.datasets import load_digits


In [3]:
dig=load_digits()
dir(dig)

['DESCR', 'data', 'feature_names', 'frame', 'images', 'target', 'target_names']

In [4]:
digits=pd.DataFrame(dig.data,columns=dig.feature_names)

In [5]:
digits['target']=dig.target

In [6]:
digits['target_names']=digits.target.apply(lambda y:dig.target_names[y])

In [7]:
digits.head()

Unnamed: 0,pixel_0_0,pixel_0_1,pixel_0_2,pixel_0_3,pixel_0_4,pixel_0_5,pixel_0_6,pixel_0_7,pixel_1_0,pixel_1_1,...,pixel_7_0,pixel_7_1,pixel_7_2,pixel_7_3,pixel_7_4,pixel_7_5,pixel_7_6,pixel_7_7,target,target_names
0,0.0,0.0,5.0,13.0,9.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,6.0,13.0,10.0,0.0,0.0,0.0,0,0
1,0.0,0.0,0.0,12.0,13.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,11.0,16.0,10.0,0.0,0.0,1,1
2,0.0,0.0,0.0,4.0,15.0,12.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,3.0,11.0,16.0,9.0,0.0,2,2
3,0.0,0.0,7.0,15.0,13.0,1.0,0.0,0.0,0.0,8.0,...,0.0,0.0,7.0,13.0,13.0,9.0,0.0,0.0,3,3
4,0.0,0.0,0.0,1.0,11.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2.0,16.0,4.0,0.0,0.0,4,4


In [8]:
##predictor variables:
pred=digits.drop(['target','target_names'],axis='columns')

In [9]:
response=digits.target

In [10]:
## splitting the data into train and test
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(pred,response,test_size=0.2)

In [11]:
x_train.shape

(1437, 64)

In [12]:
len(x_test)

360

In [29]:
model=SVC(kernel='rbf',C=10,gamma='auto')

In [30]:
model.fit(x_train,y_train)
model.score(x_test,y_test)

0.4

In [35]:
## we can run  for loop to determine the parameters\
import numpy as np
from sklearn.model_selection import cross_val_score
kernels=['rbf','linear']
Cp=[1,10,20]
average_scores={}
for k in kernels:
    for c in Cp:
        cross_scores=cross_val_score(SVC(kernel=k,C=c,gamma='auto'),x_train,y_train,cv=5)
        average_scores[k+'_'+str(c)]=np.mean(cross_scores)
average_scores

{'rbf_1': 0.33613046844754163,
 'rbf_10': 0.39668747580332947,
 'rbf_20': 0.39668747580332947,
 'linear_1': 0.9763404955478127,
 'linear_10': 0.9763404955478127,
 'linear_20': 0.9763404955478127}

In [37]:
## trying using sklearn API Gridsearch
from sklearn.model_selection import GridSearchCV
cf=GridSearchCV(SVC(gamma='auto'),{'kernel':['rbf','linear'],'C':[1,10,20]},cv=5,return_train_score=False)
cf.fit(x_train,y_train)
cf.cv_results_
df=pd.DataFrame(cf.cv_results_)

In [40]:
df[['param_C','param_kernel','mean_test_score']] ## linear kernel does pretty well as comapred to rbf

Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,rbf,0.33613
1,1,linear,0.97634
2,10,rbf,0.396687
3,10,linear,0.97634
4,20,rbf,0.396687
5,20,linear,0.97634


In [44]:
## the above will increse the computational cost when we have huge datasets
## we can instead use RandomizedSearchCV
from sklearn.model_selection import RandomizedSearchCV
clf=RandomizedSearchCV(SVC(gamma='auto'),{'kernel':['rbf','linear'],'C':[1,10,20]},cv=5,return_train_score=False,n_iter=2)
clf.fit(x_train,y_train)
clf.cv_results_
df1=pd.DataFrame(clf.cv_results_)

In [46]:
df1[['param_C','params','mean_test_score']]

Unnamed: 0,param_C,params,mean_test_score
0,10,"{'kernel': 'rbf', 'C': 10}",0.396687
1,20,"{'kernel': 'linear', 'C': 20}",0.97634


In [60]:
cm={
    'svm':{
        'model':SVC(gamma='auto'),
        'params':{
        'kernel':['rbf','linear'],
        'C':[1,10,20]
        }
    },
    
    'linear_model':{
        'model':LogisticRegression(solver='liblinear',multi_class='auto'),
        'params':{
            'C':[5,10,15]
        }
    },
    
    'naive_bayes':{
        'model':GaussianNB(),
        'params':{
            'var_smoothing':[0.3,1,9]
        } 
    },
    
    'naive_bayes':{
        'model':MultinomialNB(),
        'params':{
            'alpha':[0.1,10,20]
        }
    },
    'tree':{
        'model':DecisionTreeClassifier(),
        'params':{
             'min_samples_split':[2,5,10],
             'min_samples_leaf':[2,8,19],
             'min_weight_fraction_leaf':[0.0,0,5,1]
        }
    },
    'ensemble':{
        'model':RandomForestClassifier(),
        'params':{
            'n_estimators':[10,50,90]
        }
    }
}

In [61]:
scores=[]
for mname,mp in cm.items():
    clf=GridSearchCV(mp['model'],mp['params'],cv=5,return_train_score=False)
    clf.fit(x_train,y_train)
    scores.append({
        'model':mname,
        'best_scores':clf.best_score_,
        'best_params':clf.best_params_
    })

Traceback (most recent call last):
  File "C:\Users\ronal\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\ronal\Anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\ronal\Anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 284, in fit
    raise ValueError("min_weight_fraction_leaf must in [0, 0.5]")
ValueError: min_weight_fraction_leaf must in [0, 0.5]

Traceback (most recent call last):
  File "C:\Users\ronal\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\ronal\Anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\ronal\Anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 284, in fit
    raise ValueError("min_weight_fraction_leaf must in 

Traceback (most recent call last):
  File "C:\Users\ronal\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\ronal\Anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\ronal\Anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 284, in fit
    raise ValueError("min_weight_fraction_leaf must in [0, 0.5]")
ValueError: min_weight_fraction_leaf must in [0, 0.5]

Traceback (most recent call last):
  File "C:\Users\ronal\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\ronal\Anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\ronal\Anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 284, in fit
    raise ValueError("min_weight_fraction_leaf must in 

Traceback (most recent call last):
  File "C:\Users\ronal\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\ronal\Anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\ronal\Anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 284, in fit
    raise ValueError("min_weight_fraction_leaf must in [0, 0.5]")
ValueError: min_weight_fraction_leaf must in [0, 0.5]

Traceback (most recent call last):
  File "C:\Users\ronal\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\ronal\Anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\ronal\Anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 284, in fit
    raise ValueError("min_weight_fraction_leaf must in 

Traceback (most recent call last):
  File "C:\Users\ronal\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\ronal\Anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\ronal\Anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 284, in fit
    raise ValueError("min_weight_fraction_leaf must in [0, 0.5]")
ValueError: min_weight_fraction_leaf must in [0, 0.5]

Traceback (most recent call last):
  File "C:\Users\ronal\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\ronal\Anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\ronal\Anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 284, in fit
    raise ValueError("min_weight_fraction_leaf must in 

Traceback (most recent call last):
  File "C:\Users\ronal\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\ronal\Anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\ronal\Anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 284, in fit
    raise ValueError("min_weight_fraction_leaf must in [0, 0.5]")
ValueError: min_weight_fraction_leaf must in [0, 0.5]

Traceback (most recent call last):
  File "C:\Users\ronal\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\ronal\Anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\ronal\Anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 284, in fit
    raise ValueError("min_weight_fraction_leaf must in 

In [63]:
scores

[{'model': 'svm',
  'best_scores': 0.9763404955478127,
  'best_params': {'C': 1, 'kernel': 'linear'}},
 {'model': 'linear_model',
  'best_scores': 0.9429466705381339,
  'best_params': {'C': 10}},
 {'model': 'naive_bayes',
  'best_scores': 0.9046626984126984,
  'best_params': {'alpha': 0.1}},
 {'model': 'tree',
  'best_scores': 0.8267324816105303,
  'best_params': {'min_samples_leaf': 2,
   'min_samples_split': 2,
   'min_weight_fraction_leaf': 0}},
 {'model': 'ensemble',
  'best_scores': 0.9728561749903213,
  'best_params': {'n_estimators': 90}}]

In [64]:
score=pd.DataFrame(scores)

In [65]:
score.head()

Unnamed: 0,model,best_scores,best_params
0,svm,0.97634,"{'C': 1, 'kernel': 'linear'}"
1,linear_model,0.942947,{'C': 10}
2,naive_bayes,0.904663,{'alpha': 0.1}
3,tree,0.826732,"{'min_samples_leaf': 2, 'min_samples_split': 2..."
4,ensemble,0.972856,{'n_estimators': 90}


NameError: name 'DecisionTreeRegressor' is not defined

In [4]:
from sklearn.ensemble import RandomForestRegressor

In [None]:
RandomForestRegressor()