In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
import joblib

def train_model(data_folder, model_name):
    X, y = [], []

    for filename in os.listdir(data_folder):
        if filename.endswith(".csv"):
            label = filename.replace(".csv", "")
            filepath = os.path.join(data_folder, filename)
            df = pd.read_csv(filepath)
            if df.empty:
                print(f"Warning: {filename} is empty, skipping")
                continue
            X.append(df.values)
            y.extend([label] * len(df))

    if not X:
        print(f"No data found in {data_folder}, skipping training for {model_name}")
        return

    X = np.vstack(X)
    y = np.array(y)

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    model = RandomForestClassifier(n_estimators=150, random_state=42)
    model.fit(X_scaled, y)

    joblib.dump(model, f"{model_name}.pkl")
    joblib.dump(scaler, f"{model_name}_scaler.pkl")

    print(f" {model_name} trained and saved with {len(y)} samples")

def main():
    train_model("dataset/face", "face_model")
    train_model("dataset/hand", "hand_model")

if __name__ == "__main__":
    main()


 face_model trained and saved with 5190 samples
 hand_model trained and saved with 6520 samples
