In [4]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score


IMG_SIZE = (64, 64)


In [5]:
train_df = pd.read_csv("../data/train.csv")
val_df = pd.read_csv("../data/validation.csv")
test_df = pd.read_csv("../data/test.csv")
test_df["label"] = 0

train_df["image_path"] = train_df["image_id"].apply(lambda x: os.path.join("../data/train", f"{x}.png"))
val_df["image_path"] = val_df["image_id"].apply(lambda x: os.path.join("../data/validation", f"{x}.png"))
test_df["image_path"] = test_df["image_id"].apply(lambda x: os.path.join("../data/test", f"{x}.png"))


In [6]:
def preprocess_image(path):
    img = tf.io.read_file(path)
    img = tf.image.decode_png(img, channels=3)
    img = tf.image.resize(img, IMG_SIZE)
    return img.numpy().flatten() / 255.0  # normalize

X_train = np.array([preprocess_image(p) for p in train_df["image_path"]])
y_train = np.array(train_df["label"])

X_val = np.array([preprocess_image(p) for p in val_df["image_path"]])
y_val = np.array(val_df["label"])

X_test = np.array([preprocess_image(p) for p in test_df["image_path"]])


In [7]:
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

# Validation accuracy
val_preds = knn.predict(X_val)
acc = accuracy_score(y_val, val_preds)
print(f"Validation accuracy: {acc:.4f}")


Validation accuracy: 0.2976


In [8]:
test_preds = knn.predict(X_test)

submission = pd.DataFrame({
    "image_id": test_df["image_path"].apply(lambda x: os.path.splitext(os.path.basename(x))[0]),
    "label": test_preds
})

submission.to_csv("submission_knn.csv", index=False)
