In [1]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import OrdinalEncoder
import pandas as pd
import numpy as np

dt=pd.read_csv("D:state-population.csv").head(500)
ht=OneHotEncoder(sparse_output=False)
od=OrdinalEncoder()
dt['year_code']=od.fit_transform(dt[['year']])
dt['state_code']=od.fit_transform(dt[['state/region']])
result=ht.fit_transform(dt[['ages']])
dt=dt.join(pd.DataFrame(result, columns=['total','under18']))
dt.fillna(dt['population'].median,inplace=True)

In [2]:
dt.head(10)

Unnamed: 0,state/region,ages,year,population,year_code,state_code,total,under18
0,AL,under18,2012,1117489.0,22.0,1.0,0.0,1.0
1,AL,total,2012,4817528.0,22.0,1.0,1.0,0.0
2,AL,under18,2010,1130966.0,20.0,1.0,0.0,1.0
3,AL,total,2010,4785570.0,20.0,1.0,1.0,0.0
4,AL,under18,2011,1125763.0,21.0,1.0,0.0,1.0
5,AL,total,2011,4801627.0,21.0,1.0,1.0,0.0
6,AL,total,2009,4757938.0,19.0,1.0,1.0,0.0
7,AL,under18,2009,1134192.0,19.0,1.0,0.0,1.0
8,AL,under18,2013,1111481.0,23.0,1.0,0.0,1.0
9,AL,total,2013,4833722.0,23.0,1.0,1.0,0.0


In [3]:
X=dt[['state_code','population','year_code']]
y=dt['under18']
x_train,x_test,y_train,y_test=train_test_split(X,y, test_size=0.3,random_state=10)

In [4]:
model=SVC(kernel='poly', degree=3, C=10)
model.fit(x_train,y_train)

In [5]:
model.predict(x_test)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 0., 1., 1., 1.,
       1., 1., 0., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 0., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 0., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0.,
       1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1.])

In [6]:
model.score(x_train,y_train)

0.6171428571428571

In [7]:
model.score(x_test,y_test)

0.5466666666666666

In [8]:
from sklearn.model_selection import KFold, cross_val_score
kf=KFold(n_splits=6, shuffle=True, random_state=50)
score_svc=cross_val_score(model, X,y, cv=kf, scoring='accuracy')
np.mean(score_svc)

0.5839787722317843

In [17]:
from sklearn.model_selection import GridSearchCV
param={
    'kernel':['poly','rbf','sigmoid'],
    'degree':[2,3,4],
    'C':[1,10,15]
}
svm=SVC()
gs=GridSearchCV(svm, param,cv=6, scoring='accuracy')
gs.fit(X,y)
gs.best_params_

{'C': 1, 'degree': 2, 'kernel': 'rbf'}

In [18]:
pd.DataFrame(gs.cv_results_).head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_degree,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,split5_test_score,mean_test_score,std_test_score,rank_test_score
0,0.004251,0.001074611,0.001501,0.0004995269,1,2,poly,"{'C': 1, 'degree': 2, 'kernel': 'poly'}",0.5,0.5,0.566265,0.506024,0.493976,0.518072,0.514056,0.024511,18
1,0.004084,0.0001898979,0.002,2.973602e-07,1,2,rbf,"{'C': 1, 'degree': 2, 'kernel': 'rbf'}",0.785714,0.928571,0.614458,1.0,0.518072,0.084337,0.655192,0.304686,1
2,0.002998,7.207536e-07,0.001252,0.0003822188,1,2,sigmoid,"{'C': 1, 'degree': 2, 'kernel': 'sigmoid'}",0.535714,0.642857,0.638554,0.60241,0.518072,0.084337,0.503657,0.193387,25
3,0.003415,0.0004456667,0.001696,0.0004940574,1,3,poly,"{'C': 1, 'degree': 3, 'kernel': 'poly'}",0.5,0.5,0.566265,0.506024,0.493976,0.493976,0.51004,0.025479,19
4,0.003591,0.0004456014,0.001834,0.0003730494,1,3,rbf,"{'C': 1, 'degree': 3, 'kernel': 'rbf'}",0.785714,0.928571,0.614458,1.0,0.518072,0.084337,0.655192,0.304686,1


In [19]:
gs.best_score_

0.6551921973608721