In [1]:
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import LeaveOneOut

df1 = pd.read_csv("obesity_election_2004.csv")
df2 = pd.read_csv("obesity_election_2000.csv")

df1.columns = ['states', 'per', 'result', 'short']
df2.columns = ['states', 'per', 'result', 'short']
df1['per'] = df1['per'].str.rstrip('%').astype('float') / 100.0
df2['per'] = df2['per'].str.rstrip('%').astype('float') / 100.0
df1['result'] = df1['result'].map({'R': 0, 'D': 1})
df2['result'] = df2['result'].map({'R': 0, 'D': 1})

x_train = df1[['per']]
y_train = df1['result']

x_test = df2[['per']]
y_test = df2['result']


loo = LeaveOneOut()
krange = range(1,50)
best_k = 0
best_accuracy = 0

for k in krange:
    accuracies = []
    knn = KNeighborsClassifier(n_neighbors=k)
    


    for train_index, test_index in loo.split(x_train):
        x_train_fold, x_test_fold = x_train.iloc[train_index], x_train.iloc[test_index]
        y_train_fold, y_test_fold = y_train.iloc[train_index], y_train.iloc[test_index]

        
        knn.fit(x_train_fold, y_train_fold)
        y_pred_fold = knn.predict(x_test_fold)
        accuracy = accuracy_score(y_test_fold, y_pred_fold)
        accuracies.append(accuracy)

    mean_accuracy = np.mean(accuracies)
   
    # print(f"Mean Accuracy: {mean_accuracy * 100:.2f}%")
    if(mean_accuracy > best_accuracy):
        best_accuracy = mean_accuracy
        best_k = k

    
print(f"best_k: {best_k}")
print(f"best_accuracy: {best_accuracy}")

knn.n_neighbors = best_k


knn.fit(x_train, y_train)
y_pred = knn.predict(x_test)
test_accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")


best_k: 8
best_accuracy: 0.7
Test Accuracy: 60.00%


In [2]:
import numpy as np
import pandas as pd
from sklearn import svm
from sklearn.metrics import accuracy_score


df1 = pd.read_csv("obesity_election_2004.csv")
df2 = pd.read_csv("obesity_election_2000.csv")

df1.columns = ['states', 'per', 'result', 'short']
df2.columns = ['states', 'per', 'result', 'short']
df1['per'] = df1['per'].str.rstrip('%').astype('float') / 100.0
df2['per'] = df2['per'].str.rstrip('%').astype('float') / 100.0
df1['result'] = df1['result'].map({'R': 0, 'D': 1})
df2['result'] = df2['result'].map({'R': 0, 'D': 1})


x_train = df1[['per']]
y_train = df1['result']

x_test = df2[['per']]
y_test = df2['result']


svm_linear = svm.SVC(kernel='linear')
svm_rbf = svm.SVC(kernel='rbf')
svm_poly = svm.SVC(kernel='poly')


svm_linear.fit(x_train, y_train)
svm_rbf.fit(x_train, y_train)
svm_poly.fit(x_train, y_train)


y_pred_linear = svm_linear.predict(x_test)
y_pred_rbf = svm_rbf.predict(x_test)
y_pred_poly = svm_poly.predict(x_test)

accuracy_linear = accuracy_score(y_test, y_pred_linear)
accuracy_rbf = accuracy_score(y_test, y_pred_rbf)
accuracy_poly = accuracy_score(y_test, y_pred_poly)

print(f"Accuracy (Linear Kernel): {accuracy_linear * 100:.2f}%")
print(f"Accuracy (RBF Kernel): {accuracy_rbf * 100:.2f}%")
print(f"Accuracy (Polynomial Kernel): {accuracy_poly * 100:.2f}%")



Accuracy (Linear Kernel): 58.00%
Accuracy (RBF Kernel): 40.00%
Accuracy (Polynomial Kernel): 54.00%


In [4]:
import numpy as np
import pandas as pd
from sklearn import svm
from sklearn.metrics import accuracy_score


df1 = pd.read_csv("obesity_election_2004.csv")
df2 = pd.read_csv("obesity_election_2000.csv")

df1.columns = ['states', 'per', 'result', 'short']
df2.columns = ['states', 'per', 'result', 'short']
df1['per'] = df1['per'].str.rstrip('%').astype('float') / 100.0
df2['per'] = df2['per'].str.rstrip('%').astype('float') / 100.0
df1['result'] = df1['result'].map({'R': 0, 'D': 1})
df2['result'] = df2['result'].map({'R': 0, 'D': 1})


x_train = df1[['per']]
y_train = df1['result']

x_test = df2[['per']]
y_test = df2['result']


from sklearn.model_selection import GridSearchCV


param_grid = {
    'C': [0.1, 1, 10, 100],  
    'gamma': [1, 0.1, 0.01, 0.001],  
    'kernel': ['linear', 'rbf', 'poly']  
}


svm_classifier = svm.SVC()


grid_search = GridSearchCV(svm_classifier, param_grid, cv=5)
grid_search.fit(x_train, y_train)


best_params = grid_search.best_params_
best_estimator = grid_search.best_estimator_


y_pred_best = best_estimator.predict(x_test)


accuracy_best = accuracy_score(y_test, y_pred_best)
print(f"Best Accuracy after GridSearchCV: {accuracy_best * 100:.2f}%")




Best Accuracy after GridSearchCV: 62.00%
