**Assignment 3**

*CS 4319*

*Seth Tourish*

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier 
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.metrics import accuracy_score
import numpy as np
import statistics

#Data Loading and Preprocessing
auto_df = pd.read_csv('Auto.csv')
(auto_df=='?').sum()
auto_df_ = auto_df[auto_df['horsepower'] != '?']
(auto_df_=='?').sum()
auto_df_ = auto_df_.drop(columns=['name'])
mpg_median = statistics.median(auto_df_['mpg'].to_list())
auto_df_.iloc[auto_df_['mpg']<mpg_median,0] = 0
auto_df_.iloc[auto_df_['mpg']>=mpg_median,0] = 1
auto_df_['mpg']=auto_df_['mpg'].astype(int)

#Prepare features and target
X = auto_df_.drop('mpg', axis=1)
y = auto_df_['mpg']

#Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

#Neural Network Model
mlp = MLPClassifier(random_state=42, max_iter=1000)
param_grid_mlp = {
    'hidden_layer_sizes': [(50, 50), (100, 50), (50, 100), (100, 100)]
}
grid_search_mlp = GridSearchCV(mlp, param_grid_mlp, cv=5, scoring='accuracy')
grid_search_mlp.fit(X_train_scaled, y_train)

best_mlp = grid_search_mlp.best_estimator_
y_pred_mlp = best_mlp.predict(X_test_scaled)
accuracy_mlp = accuracy_score(y_test, y_pred_mlp)

#Logistic Regression Model
logistic_reg = LogisticRegression(random_state=42, max_iter=10000)
param_grid_logistic = {
    'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
}
grid_search_logistic = GridSearchCV(logistic_reg, param_grid_logistic, cv=5, scoring='accuracy')
grid_search_logistic.fit(X_train_scaled, y_train)

best_logistic = grid_search_logistic.best_estimator_
y_pred_logistic = best_logistic.predict(X_test_scaled)
accuracy_logistic = accuracy_score(y_test, y_pred_logistic)

#Coefficients
print("\nLogistic Regression Coefficients Interpretation:")
for feature, coef in zip(X.columns, best_logistic.coef_[0]):
    print(f"{feature}: {coef:.4f}")

#K-Nearest Neighbors Model
knn = KNeighborsClassifier()
param_grid_knn = {
    'n_neighbors': range(1, 21)
}
grid_search_knn = GridSearchCV(knn, param_grid_knn, cv=5, scoring='accuracy')
grid_search_knn.fit(X_train_scaled, y_train)

best_knn = grid_search_knn.best_estimator_
y_pred_knn = best_knn.predict(X_test_scaled)
accuracy_knn = accuracy_score(y_test, y_pred_knn)

#Accuracy Comaprison
print("\nComparison of Accuracies:")
print(f"MLP Accuracy: {accuracy_mlp:.4f}")
print(f"Logistic Regression Accuracy: {accuracy_logistic:.4f}")
print(f"KNN Accuracy: {accuracy_knn:.4f}")

print("\nBest Tuned Parameters:")
print(f"MLP Hidden Layer Sizes: {grid_search_mlp.best_params_['hidden_layer_sizes']}")
print(f"Logistic Regression Solver: {grid_search_logistic.best_params_['solver']}")
print(f"KNN Number of Neighbors: {grid_search_knn.best_params_['n_neighbors']}")


Logistic Regression Coefficients Interpretation:
cylinders: -0.3671
displacement: -0.1162
horsepower: -1.3747
weight: -2.3365
acceleration: -0.0249
year: 1.3941
origin: 0.5221

Comparison of Accuracies:
MLP Accuracy: 0.8734
Logistic Regression Accuracy: 0.8481
KNN Accuracy: 0.9114

Best Tuned Parameters:
MLP Hidden Layer Sizes: (100, 50)
Logistic Regression Solver: liblinear
KNN Number of Neighbors: 7
