In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import SMOTE

In [9]:
df = pd.read_csv("3_classes_processed_data.csv")

In [10]:
# Encode categorical variables
label_encoders = {}
for column in ['ductility', 'roof', 'relative_position']:
    label_encoders[column] = LabelEncoder()
    df[column] = label_encoders[column].fit_transform(df[column])

# Encode the target variable --> needed for XGBoost
label_encoder_target = LabelEncoder()
df['structural_system'] = label_encoder_target.fit_transform(df['structural_system'])


In [11]:
# Define features and target
X = df.drop(columns=['structural_system'])
y = df['structural_system']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Apply SMOTE to handle class imbalance
smote = SMOTE(random_state=42)
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)

**Only weights:**

In [12]:
# Initialize and train the XGBoost classifier
xgb_classifier = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)
xgb_classifier.fit(X_train, y_train)

# Make predictions
y_pred = xgb_classifier.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(report)

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Accuracy: 0.8115942028985508
Classification Report:
              precision    recall  f1-score   support

           0       0.80      0.62      0.70        63
           1       0.82      0.95      0.88       238
           2       0.71      0.34      0.46        44

    accuracy                           0.81       345
   macro avg       0.78      0.64      0.68       345
weighted avg       0.80      0.81      0.79       345



**SMOte + Weights:**

In [13]:
# Initialize and train the XGBoost classifier
xgb_classifier = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)
xgb_classifier.fit(X_train_res, y_train_res)

# Make predictions
y_pred = xgb_classifier.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(report)

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Accuracy: 0.782608695652174
Classification Report:
              precision    recall  f1-score   support

           0       0.71      0.62      0.66        63
           1       0.82      0.90      0.86       238
           2       0.57      0.39      0.46        44

    accuracy                           0.78       345
   macro avg       0.70      0.63      0.66       345
weighted avg       0.77      0.78      0.77       345

