In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_recall_curve
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RandomizedSearchCV
from sklearn.svm import SVC
from scipy.stats import uniform, randint
from imblearn.over_sampling import SMOTE
import warnings
warnings.filterwarnings('ignore')

#Import CSV File
df = pd.read_csv(r'C:\Users\kirta\OneDrive\Desktop\Projects\Food-Access-AI\Data\FoodAccessResearchAtlasData2019.xlsx - Food Access Research Atlas.csv')
df = df.dropna()

In [None]:
#Assign the attributes to the X and y values
X = df[['Urban','PovertyRate','MedianFamilyIncome','lapophalf','lapophalfshare','lalowihalf','lalowihalfshare','lakidshalfshare','laseniorshalf','laseniorshalfshare','lawhitehalfshare','lahunvhalfshare','lasnaphalf','lasnaphalfshare']]
y = df['LILATracts_1And10']

#Create your Test and Train sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state= 42, shuffle=True)

In [None]:
#Doing a RandomForestClassifer to get better precision and recall
clf = RandomForestClassifier(class_weight='balanced')
clf.fit(X_train, y_train)

#Predict outcome from test
y_pred = clf.predict(X_test)

print("Accuracy: \n", accuracy_score(y_test,y_pred))
print("Confusion Matrix: \n", confusion_matrix(y_test,y_pred))
print("Classification Report: \n", classification_report(y_test, y_pred))

scores = cross_val_score(clf, X, y, cv=5)
print(scores.mean())

In [None]:
#Using RandomizedSearchCV to find the best hyperparameters to adjust for LogisticRegression
param_dist = {
   'n_estimators': [10, 20, 30, 40],
    'max_depth': [None, 5, 10, 15, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['auto', 'sqrt', 'log2'],
    'bootstrap': [True, False],
    'class_weight': [None, 'balanced', 'balanced_subsample']
}

random_search = RandomizedSearchCV(estimator=clf, param_distributions=param_dist, n_iter=50, cv=5, scoring='accuracy', random_state=42)
random_search.fit(X_train,y_train)

print("Best Parameters: ", random_search.best_params_)
print("Best Score: ", random_search.best_score_)

In [None]:
#Doing a modified RandomoForestClassifier to boost precision and the f1 score
modified_clf = RandomForestClassifier(n_estimators=30, min_samples_split=10, min_samples_leaf=4, max_features="sqrt", max_depth= None, class_weight='balanced',bootstrap=True)
modified_clf.fit(X_train, y_train)

#Predict outcome from test
y_pred = modified_clf.predict(X_test)

print("Accuracy: \n", accuracy_score(y_test,y_pred))
print("Confusion Matrix: \n", confusion_matrix(y_test,y_pred))
print("Classification Report: \n", classification_report(y_test, y_pred))

scores = cross_val_score(modified_clf, X, y, cv=5)
print(scores.mean())

In [None]:
#Finding the optimal threshold to get a strong balance of precision and recall
probs = modified_clf.predict_proba(X_test)[:,1]

thresholds = np.arange(0.01,0.9,0.65)

for threshold in thresholds:
    preds = (probs >= threshold).astype(int)
    print(f"\n---Threshold: {threshold:.2f}---")
    print(classification_report(y_test,preds,digits=3))
    print(accuracy_score(y_test, preds))

In [None]:
precision,recall,thresholds = precision_recall_curve(y_test,probs)

plt.figure(figsize=(10,8))
plt.plot(thresholds, precision[:-1], label = "Precision")
plt.plot(thresholds, recall[:-1], label = "Recall")
plt.xlabel("Threshold")
plt.ylabel("Score")
plt.title("Precision vs Recall at Different Thresholds")
plt.legend()
plt.grid()
plt.show()

In [None]:
final_preds = (probs >= 0.37).astype(int)
print(classification_report(y_test, final_preds,digits=3))
print(accuracy_score(y_test, final_preds))
print(confusion_matrix(y_test,final_preds))