## Displaying Features in `merged.csv`

In [None]:
import pandas as pd

# Load the merged cleaned dataset
df = pd.read_csv('../data/Merged/reduced_North_data.csv')

# Display feature names
print("Features/Columns in merge.csv:")
print(df.columns.tolist())


Features/Columns in merge.csv:
['elevation', 'spring_prec', 'winter_prec', 'autumn_prec', 'GRIDCODE', 'summer_prec', 'winter_tmin', 'ORG_CARBON', 'CEC_CLAY', 'GYPSUM', 'SAND', 'SILT', 'TOTAL_N', 'PH_WATER', 'BULK', 'BSAT', 'TCARBON_EQ', 'COARSE', 'CEC_SOIL', 'CN_RATIO', 'TEX_9', 'TEX_11', 'TEX_5', 'ALUM_SAT', 'TEX_12', 'fire']


In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv('../data/Merged/reduced_data.csv')

# Features (all except 'fire'), Target ('fire')
X = df.drop('fire', axis=1)
y = df['fire']

# Split into training and test sets (80/20 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Confirm shapes
print(f"Training samples: {X_train.shape[0]}, Test samples: {X_test.shape[0]}")


Training samples: 400960, Test samples: 100241


## Building K-Nearest Neighbors (KNN) From Scratch

In [13]:
import numpy as np
from collections import Counter

def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))

class MyKNNClassifier:
    def __init__(self, k=5):
        self.k = k
    def fit(self, X, y):
        self.X_train = np.array(X)
        self.y_train = np.array(y)
    def predict(self, X):
        X = np.array(X)
        y_pred = []
        for x in X:
            distances = [euclidean_distance(x, x_train) for x_train in self.X_train]
            k_indices = np.argsort(distances)[:self.k]
            k_neighbor_labels = self.y_train[k_indices]
            most_common = Counter(k_neighbor_labels).most_common(1)[0][0]
            y_pred.append(most_common)
        return np.array(y_pred)


In [14]:
knn = MyKNNClassifier(k=5)
knn.fit(X_train, y_train)
knn_preds = knn.predict(X_test)


KeyboardInterrupt: 