In [None]:
import pandas as pd
import seaborn as sns
import sklearn
from dmgpred.cleaning import clean_single
from dmgpred.featurize import featurize_single
from joblib import load

sklearn.set_config(transform_output="pandas")

sns.set_theme()

In [None]:
DATA_PATH = "../data/"
OUTPUT_PATH = "../output/"
TEST_VALUES_PATH = f"{DATA_PATH}/test_values.csv"
TRAIN_VALUES_PATH = f"{DATA_PATH}/train_values.csv"
TRAIN_LABELS_PATH = f"{DATA_PATH}/train_labels.csv"
SUBMISSION_PATH = f"{OUTPUT_PATH}/Mandalorians_prediction.csv"
INDEX_COL = "building_id"

In [None]:
model = load(f"{OUTPUT_PATH}trained_model.pkl")
classifier = model.named_steps["clf"]
feature_names = classifier.feature_names_in_
importances = classifier.feature_importances_
importances = pd.Series(importances, index=feature_names).sort_values(ascending=True)
importances.plot.barh(figsize=(10, 10))

In [None]:
X_train = pd.read_csv(TRAIN_VALUES_PATH, index_col=INDEX_COL)
y_train = pd.read_csv(TRAIN_LABELS_PATH, index_col=INDEX_COL)
y_train = y_train["damage_grade"].copy()
X_train = clean_single(X_train)
X_train = featurize_single(X_train)

In [None]:
model.named_steps

In [None]:
# feature_names_out = model.named_steps["encoder"].get_feature_names_out()
prep = model.named_steps["preprocessor"]
X_train_processed = prep["normalizer"].transform(X_train)
X_train_processed = prep["encoder"].transform(X_train_processed)

In [None]:
import matplotlib.pyplot as plt
from sklearn.calibration import LabelEncoder
from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier

label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)

X_train_split, X_test_split, y_train_split, y_test_split = train_test_split(
    X_train_processed, y_train, stratify=y_train
)
xgb = XGBClassifier()
print(X_test_split.shape)

xgb.fit(X_train_split, y_train_split)
y_pred = xgb.predict(X_test_split)

cm = confusion_matrix(y_test_split, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=xgb.classes_)
disp.plot()
plt.grid(False)
plt.show()

In [None]:
from sklearn.svm import SVC

svc = SVC()
svc.fit(X_train_split, y_train_split)

In [None]:
y_pred = svc.predict(X_test_split)

In [None]:
from sklearn.metrics import f1_score, matthews_corrcoef

print(matthews_corrcoef(y_test_split, y_pred))
print(f1_score(y_test_split, y_pred, average="micro"))