In [1]:
import os
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import joblib


## Paths

In [2]:

FEATURES_PATH = r"../features"
MODELS_PATH = r"../models"

os.makedirs(MODELS_PATH, exist_ok=True)
os.makedirs(FEATURES_PATH, exist_ok=True)


## Load features

In [3]:

X = np.load(os.path.join(FEATURES_PATH, "X_hog.npy"))
y = np.load(os.path.join(FEATURES_PATH, "y.npy"))

print("Original X shape:", X.shape)
print("Original y shape:", y.shape)

Original X shape: (11190, 8100)
Original y shape: (11190,)


 ## Train-Test Split

In [4]:
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

print("Train shape:", X_train.shape)
print("Test shape:", X_test.shape)


Train shape: (8952, 8100)
Test shape: (2238, 8100)


# Feature Scaling


In [5]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)  # FIT ONLY ON TRAIN
X_test_scaled = scaler.transform(X_test)

## Save scaled features and scaler

In [6]:
np.save(os.path.join(FEATURES_PATH, "X_train.npy"), X_train)
np.save(os.path.join(FEATURES_PATH, "X_test.npy"), X_test)
np.save(os.path.join(FEATURES_PATH, "y_train.npy"), y_train)
np.save(os.path.join(FEATURES_PATH, "y_test.npy"), y_test)

np.save(os.path.join(FEATURES_PATH, "X_train_scaled.npy"), X_train_scaled)
np.save(os.path.join(FEATURES_PATH, "X_test_scaled.npy"), X_test_scaled)

joblib.dump(scaler, os.path.join(MODELS_PATH, "scaler.pkl"))

print("Train-test split and scaling completed correctly ✔️")

Train-test split and scaling completed correctly ✔️
