In [6]:
# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score


In [7]:
# Load the dataset
data = pd.read_csv("C:/Users/Shuvoprime/Documents/NPHA-doctor-visits.csv")

# Check the dataset
print(data.head())
print(data.info())

# Split features (X) and target variable (y)
X = data.drop('Trouble Sleeping', axis=1)  # Replace with your target column
y = data['Trouble Sleeping']  # Replace with your target column

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


   Number of Doctors Visited  Age  Phyiscal Health  Mental Health  \
0                          3    2                4              3   
1                          2    2                4              2   
2                          3    2                3              2   
3                          1    2                3              2   
4                          3    2                3              3   

   Dental Health  Employment  Stress Keeps Patient from Sleeping  \
0              3           3                                   0   
1              3           3                                   1   
2              3           3                                   0   
3              3           3                                   0   
4              3           3                                   1   

   Medication Keeps Patient from Sleeping  Pain Keeps Patient from Sleeping  \
0                                       0                                 0   
1                 

In [8]:
from sklearn.pipeline import Pipeline

# Define the SVM pipeline
pipe = Pipeline([
    ('scaler', StandardScaler()),  # Standardize features
    ('svm', SVC(random_state=42))  # Support Vector Machine
])


In [9]:
# Define parameter grid for RBF kernel
param_grid_rbf = {
    'svm__kernel': ['rbf'],
    'svm__C': [0.1, 1, 10, 100],
    'svm__gamma': [0.001, 0.01, 0.1, 1]
}

# Grid search for RBF kernel
grid_rbf = GridSearchCV(pipe, param_grid_rbf, cv=5, scoring='accuracy')
grid_rbf.fit(X_train, y_train)

# Best parameters and accuracy for RBF kernel
print("Best parameters for RBF kernel:", grid_rbf.best_params_)
print("Best accuracy for RBF kernel:", grid_rbf.best_score_)




Best parameters for RBF kernel: {'svm__C': 1, 'svm__gamma': 0.01, 'svm__kernel': 'rbf'}
Best accuracy for RBF kernel: 0.5992323232323231


In [10]:
# Define parameter grid for Polynomial kernel
param_grid_poly = {
    'svm__kernel': ['poly'],
    'svm__C': [0.1, 1, 10, 100],
    'svm__degree': [2, 3, 4],
    'svm__gamma': ['scale', 'auto']
}

# Grid search for Polynomial kernel
grid_poly = GridSearchCV(pipe, param_grid_poly, cv=5, scoring='accuracy')
grid_poly.fit(X_train, y_train)

# Best parameters and accuracy for Polynomial kernel
print("Best parameters for Polynomial kernel:", grid_poly.best_params_)
print("Best accuracy for Polynomial kernel:", grid_poly.best_score_)




Best parameters for Polynomial kernel: {'svm__C': 1, 'svm__degree': 2, 'svm__gamma': 'auto', 'svm__kernel': 'poly'}
Best accuracy for Polynomial kernel: 0.5551919191919192


In [11]:
# Evaluate RBF kernel on the test set
best_rbf_model = grid_rbf.best_estimator_
y_pred_rbf = best_rbf_model.predict(X_test)
rbf_test_accuracy = accuracy_score(y_test, y_pred_rbf)
print("Test accuracy for RBF kernel:", rbf_test_accuracy)

# Evaluate Polynomial kernel on the test set
best_poly_model = grid_poly.best_estimator_
y_pred_poly = best_poly_model.predict(X_test)
poly_test_accuracy = accuracy_score(y_test, y_pred_poly)
print("Test accuracy for Polynomial kernel:", poly_test_accuracy)


Test accuracy for RBF kernel: 0.5395348837209303
Test accuracy for Polynomial kernel: 0.5116279069767442


In [12]:
import joblib

# Save the best-performing model
joblib.dump(best_rbf_model, 'best_rbf_model.pkl')  # Save RBF model
joblib.dump(best_poly_model, 'best_poly_model.pkl')  # Save Polynomial model


['best_poly_model.pkl']