In [None]:
'''data loading'''
import os
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from tqdm import tqdm

# ------------------------------
# Map raw age labels to classes
# ------------------------------
def age_to_class(age):
    try:
        age = int(age)
    except ValueError:
        print(f"⚠️ Invalid label '{age}' – skipping")
        return None

    if age <= 19:
        return 0
    elif age <= 29:
        return 1
    elif age <= 39:
        return 2
    elif age <= 49:
        return 3
    elif age <= 59:
        return 4
    elif age <= 69:
        return 5
    else:
        return 6

# ------------------------------
# Load data from a folder
# ------------------------------
def load_data_from_folder(folder_path, fixed_len=None):
    X, y = [], []
    sample_lengths = []

    if fixed_len is None:
        for label in os.listdir(folder_path):
            label_path = os.path.join(folder_path, label)
            if not os.path.isdir(label_path):
                continue
            for file in os.listdir(label_path):
                if file.endswith(".csv"):
                    try:
                        df = pd.read_csv(os.path.join(label_path, file))
                        sample_lengths.append(len(df.values.flatten()))
                    except:
                        continue
        fixed_len = min(sample_lengths) if sample_lengths else 0

    for label in os.listdir(folder_path):
        label_path = os.path.join(folder_path, label)
        if not os.path.isdir(label_path):
            continue
        for file in tqdm(os.listdir(label_path), desc=f"Loading {label}"):
            if file.endswith(".csv"):
                file_path = os.path.join(label_path, file)
                try:
                    df = pd.read_csv(file_path)
                    flat_data = df.values.flatten()

                    if len(flat_data) >= fixed_len:
                        flat_data = flat_data[:fixed_len]
                    else:
                        flat_data = np.pad(flat_data, (0, fixed_len - len(flat_data)))

                    label_class = age_to_class(label)
                    if label_class is not None:
                        X.append(flat_data)
                        y.append(label_class)
                except Exception as e:
                    print(f"⚠️ Error loading {file_path}: {e}")

    if len(X) == 0:
        raise ValueError(f"No valid data loaded from {folder_path}")

    return np.array(X), np.array(y), fixed_len

# ------------------------------
# Load and split data
# ------------------------------
data_folder = "E:\brainageprediction\split\train"
X, y, fixed_len = load_data_from_folder(data_folder)

# Use stratified split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42, stratify=y
)


ModuleNotFoundError: No module named 'catboost'

In [None]:
'''lightbgm'''
from tqdm.auto import tqdm
from sklearn.metrics import classification_report

print("\n🔷 Training LightGBM Classifier...")
tqdm.write("Fitting LightGBM...")

lgb_model = LGBMClassifier(
    is_unbalance=True,
    n_estimators=100,
    learning_rate=0.05,
    random_state=42,
    n_jobs=-1
)

lgb_model.fit(X_train, y_train)
lgb_pred = lgb_model.predict(X_test)

print("\n📋 LightGBM Classification Report:")
print(classification_report(y_test, lgb_pred))


In [None]:
'''catboost'''
print("\n🔷 Training CatBoost Classifier...")
tqdm.write("Fitting CatBoost...")

cat_model = CatBoostClassifier(
    iterations=100,
    learning_rate=0.05,
    depth=6,
    random_seed=42,
    verbose=0  # Suppress CatBoost's own progress
)

cat_model.fit(X_train, y_train)
cat_pred = cat_model.predict(X_test)

print("\n📋 CatBoost Classification Report:")
print(classification_report(y_test, cat_pred))
