In [2]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from k_nearest_neighbors import KNNClassifier
from decision_tree import DecisionTree

# Step 1: Load the dataset
df = pd.read_csv("./dataset/winequalityN.csv")

# Step 2: Preprocess the data
le = preprocessing.LabelEncoder()
le.fit(df["type"])

# Convert string labels to int labels
df['type'] = le.transform(df["type"])

# Fill NaN values with 0
df = df.fillna(0)

# Identify features (X) and labels (y)
label_collumn = "type"
y = df[label_collumn]
X = df.loc[:, df.columns != label_collumn]

# Convert to numpy arrays
y = y.to_numpy()
X = X.to_numpy()

# Step 3: Split dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, random_state=42, stratify=y)

# Step 4: Train and evaluate Decision Tree
tree = DecisionTree(max_depth=3, mode="ig")
tree.fit(X_train, y_train)

# Predict with Decision Tree on training set (train accuracy)
y_train_pred_tree = tree.predict(X_train)
train_accuracy_tree = accuracy_score(y_train, y_train_pred_tree)
print(f"Decision Tree Training Accuracy: {train_accuracy_tree:.2%}")

# Predict with Decision Tree on validation set (validation accuracy)
y_val_pred_tree = tree.predict(X_val)
val_accuracy_tree = accuracy_score(y_val, y_val_pred_tree)
print(f"Decision Tree Validation Accuracy: {val_accuracy_tree:.2%}")

# Step 5: Train and evaluate KNN Classifier
knn = KNNClassifier(k=3)
knn.fit(X_train, y_train)

# Predict with KNN Classifier on training set (train accuracy)
y_train_pred_knn = knn.predict(X_train)
train_accuracy_knn = accuracy_score(y_train, y_train_pred_knn)
print(f"KNN Training Accuracy: {train_accuracy_knn:.2%}")

# Predict with KNN Classifier on validation set (validation accuracy)
y_val_pred_knn = knn.predict(X_val)
val_accuracy_knn = accuracy_score(y_val, y_val_pred_knn)
print(f"KNN Validation Accuracy: {val_accuracy_knn:.2%}")


Decision Tree Training Accuracy: 97.79%
Decision Tree Validation Accuracy: 97.85%
KNN Training Accuracy: 97.16%
KNN Validation Accuracy: 95.69%
