# Preperations

## Import Necessary Modules

In [None]:
import sys
import numpy as np
sys.path.append("../lab1")

from mltk.loader import load_data
from mltk.preprocessing import DataPipeline
from mltk.cross_validation import k_fold_split
from knn import KNN

## Load and Preprocess Data for Classification Tasks

In [None]:
# Load and preprocess the classification data
df_classification = load_data("../assets/breast_cancer_wisconsin_original/breast-cancer-wisconsin.csv")
df_classification = df_classification.drop(['sample_code_number'], axis=1)

steps_classification = [
    {"operation": "fill_missing_values"},
    {"operation": "standardize", "params": {"column": [
        "clump_thickness", "uni_cell_size", "uni_cell_shape",
        "marginal_adhesion", "single_epithelial_cell_size", "bare_nuclei",
        "bland_chromatin", "normal_nucleoli", "mitoses"
    ]}},
]
pipeline_classification = DataPipeline(steps_classification)
df_classification = pipeline_classification.run(df_classification)
df_classification = df_classification.rename(columns={"class": "target"})


## Load and Preprocess Data for Regression Tasks

In [None]:
# Load and preprocess the regression data
df_regression = load_data("../assets/forest_fires/forestfires.csv")

steps_regression = [
    {"operation": "fill_missing_values"},
    {"operation": "one_hot_encode","params": {"column": "month"}},
    {"operation": "one_hot_encode","params": {"column": "day"}},
    {"operation": "standardize", "params": {"column": ["X", "Y", "FFMC", "DMC", "DC", "ISI", "temp", "RH", "wind", "rain"]}},
]

pipeline_regression = DataPipeline(steps_regression)
df_regression = pipeline_regression.run(df_regression)
df_regression = df_regression.rename(columns={"area": "target"})


# Demo

## Splitting Data into Five Folds

In [None]:
# Use the given k_fold_split function or an equivalent to demonstrate this
folds_classification = list(k_fold_split(df_classification, n_splits=5))
for i, (train, dev) in enumerate(folds_classification):
    print(f"Classification Data - Size of Fold {i+1}: Train: {len(train)}, dev: {len(dev)}")


## Demonstrate Calculation of Distance Function

In [None]:
# Initializing a KNN model for the demonstration
knn_classifier = KNN(k=3)

# Sample data instances
instance1 = df_regression.iloc[0]
instance2 = df_regression.iloc[1]

# Calculation
knn_classifier.fit(df_regression.drop("target", axis=1), df_regression["target"])
distance = knn_classifier.vdm_distance(df_regression, instance1, instance2, 'target')

print(f"Distance between instance 1 and instance 2: {distance}")


## Demonstrate Calculation of Kernel Function

In [None]:
x = np.array([1, 2, 3])
xq = np.array([4, 5, 6])
gamma_value = 0.5

kernel_value = knn_classifier.rbf_kernel(x, xq, gamma_value)

print(f"Kernel value between x and xq with gamma {gamma_value}: {kernel_value}")


## Demonstrate Classification using k-NN

In [None]:
# Initialize the KNN object
knn_classifier = KNN(k=5, task="classification")
knn_classifier.fit(df_classification.iloc[1:].drop("target", axis=1), df_classification.iloc[1:]["target"])

# Sample instance for classification
sample_instance_classification = df_classification.iloc[0:1]

predicted_class = knn_classifier.predict(sample_instance_classification)
print(f"Predicted Class for the instance: {predicted_class[0]}")
print(f"Actual Class for the instance: {df_classification.iloc[0:1]['target'].values[0]}")


## Demonstrate Regression using k-NN

In [None]:
# Initialize the KNN object
knn_regressor = KNN(k=5, task="regression", gamma=3)
knn_regressor.fit(df_regression.drop("target", axis=1), df_regression["target"])

# Sample instance for regression
sample_instance_regression = df_regression.iloc[0:1]

predicted_value = knn_regressor.predict(sample_instance_regression)
print(f"Predicted Value for the instance: {predicted_value[0]}")
print(f"Actual Value for the instance: {df_regression.iloc[0:1]['target'].values[0]}")


## Demonstrate Edited k-NN

In [None]:
# Using the same classification data
edited_data = knn_classifier.edited_k_nearest_neighbors(df_classification, 3)

print(f"Number of rows before editing: {len(df_classification)}")
print(f"Number of rows after editing: {len(edited_data)}")


## Demonstrate Condensed k-NN

In [None]:
condensed_data = knn_classifier.condensed_k_nearest_neighbors(df_classification, 3)

print(f"Number of rows before condensation: {len(df_classification)}")
print(f"Number of rows after condensation: {len(condensed_data)}")
