In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.impute import SimpleImputer

In [None]:
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

In [None]:
train = train.drop("SEQN", axis=1)
test_ids = test["SEQN"]
test = test.drop("SEQN", axis=1)

train['age_group'] = train['age_group'].map({'Adult': 0, 'Senior': 1})

imputer = SimpleImputer(strategy="median")
X = train.drop("age_group", axis=1)
y = train["age_group"]
X_imputed = imputer.fit_transform(X)
X_test_imputed = imputer.transform(test)

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X_imputed, y, test_size=0.2, random_state=42)

clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_val)

print(classification_report(y_val, y_pred))
sns.heatmap(confusion_matrix(y_val, y_pred), annot=True, fmt="d", cmap="Blues")
plt.show()

In [None]:
importances = clf.feature_importances_
feat_names = X.columns
sns.barplot(x=importances, y=feat_names)
plt.show()

In [None]:
preds = clf.predict(X_test_imputed)
submission = pd.DataFrame({'age_group': preds})
submission.to_csv("submission.csv", index=False)