In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
import json

# read datasets
train_source = pd.read_csv("./data/train_cleaned.csv")
test_source = pd.read_csv("./data/testing_set.csv")

# new datasets
train_data = train_source.copy()
test_data = test_source.copy()

# Preprocess the Time column creating 3 new columns: day, month and year
train_data['day'] = pd.to_datetime(train_data['Time']).dt.day
train_data['month'] = pd.to_datetime(train_data['Time']).dt.month
train_data['year'] = pd.to_datetime(train_data['Time']).dt.year
test_data['day'] = pd.to_datetime(test_data['Time']).dt.day
test_data['month'] = pd.to_datetime(test_data['Time']).dt.month
test_data['year'] = pd.to_datetime(test_data['Time']).dt.year

# remove the columns that are not needed: train_idx, valid, Time, outlier and valid2 columns
train_data = train_data.drop(['train_idx', "Time"], axis=1)
test_data = test_data.drop(['test_idx', "Time"], axis=1)

# Split the dataset into features (X) and target variable (y)
X = train_data.drop("label", axis=1)
y = train_data["label"]

# Perform time-based splitting
train_size = int(0.8 * len(train_data))  # 80% for training, 20% for testing
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Preprocess the features by scaling them
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(test_data)

# Create a k-NN classifier
k = 5  # Set the value of k
knn = KNeighborsClassifier(n_neighbors=k)

# Train the classifier
knn.fit(X_train_scaled, y_train)

# Make predictions on the test data
predictions = knn.predict(X_test_scaled)

# Create the dictionary with test_idx as keys and predicted labels as values
predictions_dict = {str(idx): int(label) for idx, label in zip(test_source['test_idx'], predictions)}

# Create the final JSON structure
output = {"target": predictions_dict}

# Save the JSON to a file
with open('./data/predsKNN.json', 'w') as json_file:
    json.dump(output, json_file)