In [None]:
from google.colab import files
uploaded = files.upload()

Saving Train_Data.csv to Train_Data (2).csv
Saving Test_Data.csv to Test_Data (2).csv


In [32]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report


train = pd.read_csv('Train_Data.csv')
test = pd.read_csv('Test_Data.csv')

print(f"Train shape: {train.shape}")
print(f"Test shape: {test.shape}")


train = train.dropna(subset=['age_group'])


features = [col for col in train.columns if col != 'age_group']


for col in features:
    if train[col].dtype != 'object':
        median_val = train[col].median()
        train[col] = train[col].fillna(median_val)
        if col in test.columns:
            test[col] = test[col].fillna(median_val)
    else:
        mode_val = train[col].mode()[0]
        train[col] = train[col].fillna(mode_val)
        if col in test.columns:
            test[col] = test[col].fillna(mode_val)


train['age_group'] = train['age_group'].map({'Adult': 0, 'Senior': 1})


X = train[features]
y = train['age_group']
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)


rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)
rf_pred = rf.predict(X_val)
print("Random Forest Accuracy:", accuracy_score(y_val, rf_pred))


lgbm = LGBMClassifier(random_state=42)
lgbm.fit(X_train, y_train)
lgbm_pred = lgbm.predict(X_val)
print("LightGBM Accuracy:", accuracy_score(y_val, lgbm_pred))


xgb = XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='logloss')
xgb.fit(X_train, y_train)
xgb_pred = xgb.predict(X_val)
print("XGBoost Accuracy:", accuracy_score(y_val, xgb_pred))


ensemble_pred = (rf_pred + lgbm_pred + xgb_pred) >= 2
ensemble_pred = ensemble_pred.astype(int)
print("Ensemble Accuracy:", accuracy_score(y_val, ensemble_pred))
print(classification_report(y_val, ensemble_pred, target_names=['Adult', 'Senior']))


rf.fit(X, y)
lgbm.fit(X, y)
xgb.fit(X, y)


rf_test = rf.predict(test[features])
lgbm_test = lgbm.predict(test[features])
xgb_test = xgb.predict(test[features])

ensemble_test = (rf_test + lgbm_test + xgb_test) >= 2
ensemble_test = ensemble_test.astype(int)


submission = pd.DataFrame({
    'age_group': ensemble_test
})

#submission['age_group'] = submission['age_group'].map({0: 'Adult', 1: 'Senior'})

submission.to_csv('/content/submission.csv', index=False)
print("✅ Submission file saved as submission.csv")
files.download('submission.csv')

Train shape: (1966, 9)
Test shape: (312, 8)
Random Forest Accuracy: 0.8235294117647058
[LightGBM] [Info] Number of positive: 251, number of negative: 1310
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000107 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1000
[LightGBM] [Info] Number of data points in the train set: 1561, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.160794 -> initscore=-1.652329
[LightGBM] [Info] Start training from score -1.652329
LightGBM Accuracy: 0.7979539641943734
XGBoost Accuracy: 0.7877237851662404
Ensemble Accuracy: 0.80306905370844
              precision    recall  f1-score   support

       Adult       0.84      0.94      0.89       328
      Senior       0.23      0.10      0.13        63

    accuracy                           0.80       391
   macro avg       0.54      0.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 314, number of negative: 1638
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000276 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1019
[LightGBM] [Info] Number of data points in the train set: 1952, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.160861 -> initscore=-1.651838
[LightGBM] [Info] Start training from score -1.651838
✅ Submission file saved as submission.csv


Parameters: { "use_label_encoder" } are not used.



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>