In [2]:
import os
import subprocess

def install(package):
    subprocess.run(f'pip install {package}', shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

install('--upgrade pip setuptools wheel')
install('cython')
install('pyyaml')
install('scipy')
install('scikit-learn')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.naive_bayes import GaussianNB

file_path = '/Users/sherryzhang/Downloads/Final_Balanced_Data.csv'
data = pd.read_csv(file_path)

poverty_threshold = 13590
data['poverty_status'] = np.where(data['PINCP'] <= poverty_threshold, 1, 0)

data = data.drop(columns=['PINCP'])

X = data.drop(columns=['poverty_status'])
Y = data['poverty_status']

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=18)

naive_bayes_model = GaussianNB()

param_grid = {
    'var_smoothing': np.logspace(0, -9, num=100)
}

grid_search = GridSearchCV(estimator=naive_bayes_model, param_grid=param_grid, cv=3, verbose=1, n_jobs=-1, scoring='accuracy')
grid_search.fit(X_train, Y_train)

best_naive_bayes_model = grid_search.best_estimator_

print("Best parameters found: ", grid_search.best_params_)

predictions = best_naive_bayes_model.predict(X_test)

accuracy = accuracy_score(Y_test, predictions)
precision = precision_score(Y_test, predictions)
recall = recall_score(Y_test, predictions)
f1 = f1_score(Y_test, predictions)
conf_matrix = confusion_matrix(Y_test, predictions)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
print(f"Confusion Matrix: \n{conf_matrix}")

Fitting 3 folds for each of 100 candidates, totalling 300 fits
Best parameters found:  {'var_smoothing': 5.336699231206302e-08}
Accuracy: 0.7589604874889043
Precision: 0.7583329325188783
Recall: 0.7583967675601573
F1 Score: 0.7583648486961959
Confusion Matrix: 
[[63477 20098]
 [20091 63066]]
