In [1]:
# Cell 1: suppress warnings & import libraries
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np

from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score, f1_score, precision_score, recall_score,
    classification_report, multilabel_confusion_matrix
)
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
from sklearn.multiclass import OneVsRestClassifier

# oversampling
from imblearn.over_sampling import RandomOverSampler, SMOTE

ModuleNotFoundError: No module named 'pandas'

In [None]:
# Cell 2: load your features & labels
# assumes train/<val>_features_no_dmg_resnet.csv live in same folder as this notebook
train_df = pd.read_csv('train_features_no_dmg_resnet.csv')
val_df   = pd.read_csv('val_features_no_dmg_resnet.csv')

print(f"Training set: {train_df.shape}, Validation set: {val_df.shape}")

In [None]:
# Cell 3: define feature matrix X and label vector y
label_col = 'damage_type'   # ← replace with your actual target column name

X_train = train_df.drop(columns=[label_col]).values
y_train = train_df[label_col].values

X_val   = val_df.drop(columns=[label_col]).values
y_val   = val_df[label_col].values

# encode labels to integers
le      = preprocessing.LabelEncoder()
y_train = le.fit_transform(y_train)
y_val   = le.transform(y_val)

print(f"Classes: {le.classes_}")

In [None]:
label_counts = y_train.sum().sort_values(ascending=False)
print(label_counts)

In [None]:
import matplotlib.pyplot as plt

label_counts.plot(kind='bar')
plt.title("Label Distribution in y_train")
plt.ylabel("Frequency")
plt.xlabel("Labels")
plt.show()

In [None]:
# Cell 4: handle multiclass imbalance
# Option A: simple RandomOverSampler
ros = RandomOverSampler(random_state=42)
X_res, y_res = ros.fit_resample(X_train, y_train)

# Option B (alternative): SMOTE for multiclass
# sm = SMOTE(random_state=42)
# X_res, y_res = sm.fit_resample(X_train, y_train)

print("Before resampling:", np.bincount(y_train))
print("After  resampling:", np.bincount(y_res))

In [None]:
# Cell 5: train & evaluate Logistic Regression
lr = OneVsRestClassifier(LogisticRegression(max_iter=1000, random_state=42))
lr.fit(X_res, y_res)
y_pred_lr = lr.predict(X_val)

print("Logistic Regression Metrics")
print("Confusion matrices:\n", multilabel_confusion_matrix(y_val, y_pred_lr))
print("Accuracy:", accuracy_score(y_val, y_pred_lr))
print("F1 (micro):", f1_score(y_val, y_pred_lr, average='micro'))
print("Precision (micro):", precision_score(y_val, y_pred_lr, average='micro'))
print("Recall (micro):", recall_score(y_val, y_pred_lr, average='micro', zero_division=0))
print("\nClassification Report:\n", classification_report(y_val, y_pred_lr, target_names=le.classes_))

In [None]:
# Cell 6: train & evaluate MLPClassifier
mlp = OneVsRestClassifier(MLPClassifier(hidden_layer_sizes=(100,), max_iter=300, random_state=42))
mlp.fit(X_res, y_res)
y_pred_mlp = mlp.predict(X_val)

print("MLP Classifier Metrics")
print("Accuracy:", accuracy_score(y_val, y_pred_mlp))
print("F1 (micro):", f1_score(y_val, y_pred_mlp, average='micro'))
print("Classification Report:\n", classification_report(y_val, y_pred_mlp, target_names=le.classes_))

In [None]:
#Logistic Regression

import pandas as pd
import numpy as np
from sklearn import metrics 
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import make_scorer, precision_score, accuracy_score
from sklearn.model_selection import cross_val_score
from sklearn.metrics import f1_score

from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import multilabel_confusion_matrix
from sklearn.metrics import classification_report

clf=OneVsRestClassifier(LogisticRegression(penalty='l2',solver='newton-cg', 
                      max_iter=1500))

clf=clf.fit(X_train, y_train)
y_pred=clf.predict(X_val)

confM = multilabel_confusion_matrix(y_val, y_pred)
print(confM)

acc=accuracy_score(y_val, y_pred)
F1=f1_score(y_val, y_pred,average='micro')
precicion = precision_score(y_val, y_pred, average='micro')
recall = recall_score(y_val, y_pred, average='micro', zero_division=0)
print('By hold-out evaluation: acc = ',acc, ',F1 = ',F1, 'precicion =  ' ,precicion , 'recall = ' , recall )
print("\nClassification Report:\n", classification_report(y_val, y_pred))



In [None]:
# === Imports ===
import pandas as pd
import numpy as np

from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import (
    accuracy_score, f1_score, precision_score, recall_score,
    multilabel_confusion_matrix, classification_report
)

# === Data Splitting ===
X_train = train_df.iloc[:, :2047]
y_train = train_df.iloc[:, 2048:]
X_val = val_df.iloc[:, :2047]
y_val = val_df.iloc[:, 2048:]

print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_val shape:", X_val.shape)
print("y_val shape:", y_val.shape)

# === Grid Search Setup ===
base_clf = LogisticRegression(solver='saga', max_iter=5000)
ovr = OneVsRestClassifier(base_clf)

param_grid = {
    'estimator__solver': ['newton-cg', 'lbfgs', 'sag'],
    'estimator__penalty': ['l1', 'l2'],
    'estimator__class_weight': [None, 'balanced'],
}

grid_search = GridSearchCV(
    estimator=ovr,
    param_grid=param_grid,
    scoring='f1_micro',
    cv=3,
    verbose=1,
    n_jobs=-1
)

# === Model Training ===
grid_search.fit(X_train, y_train)
best_model = grid_search.best_estimator_

print("Best Parameters:", grid_search.best_params_)

# === Evaluation ===
y_pred = best_model.predict(X_val)

confM = multilabel_confusion_matrix(y_val, y_pred)
print(confM)

acc = accuracy_score(y_val, y_pred)
F1 = f1_score(y_val, y_pred, average='micro')
precicion = precision_score(y_val, y_pred, average='micro')
recall = recall_score(y_val, y_pred, average='micro', zero_division=0)

print('By hold-out evaluation: acc = ', acc, ',F1 = ', F1, 
      'precicion = ', precicion, 'recall = ', recall)

print("\nClassification Report:\n", classification_report(y_val, y_pred))


In [None]:
#Random Forest

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier


bag = RandomForestClassifier(n_estimators=100, max_samples=0.8, random_state=1)

clf=bag.fit(X_train, y_train)
y_pred=clf.predict(X_val)

confM = multilabel_confusion_matrix(y_val, y_pred)
print(confM)

acc=accuracy_score(y_val, y_pred)
F1=f1_score(y_val, y_pred,average='micro')
precicion = precision_score(y_val, y_pred, average='micro')
recall = recall_score(y_val, y_pred, average='micro', zero_division=0)
print('By hold-out evaluation: acc = ',acc, ',F1 = ',F1, 'precicion =  ' ,precicion , 'recall = ' , recall )
print("\nClassification Report:\n", classification_report(y_val, y_pred))


In [None]:
#MLPCLASSIFIER

from sklearn.neural_network import MLPClassifier

mlp = OneVsRestClassifier(MLPClassifier(
    hidden_layer_sizes=(100, 50), activation='relu',solver='adam',alpha=0.0001, batch_size='auto',learning_rate='adaptive',max_iter=200,random_state=1))
clf=mlp.fit(X_train, y_train)
y_pred=clf.predict(X_val)

confM = multilabel_confusion_matrix(y_val, y_pred)
print(confM)

acc=accuracy_score(y_val, y_pred)
F1=f1_score(y_val, y_pred,average='micro')
precicion = precision_score(y_val, y_pred, average='micro')
recall = recall_score(y_val, y_pred, average='micro', zero_division=0)
print('By hold-out evaluation: acc = ',acc, ',F1 = ',F1, 'precicion =  ' ,precicion , 'recall = ' , recall )
print("\nClassification Report:\n", classification_report(y_val, y_pred))


In [None]:
from sklearn.multiclass import OneVsRestClassifier
from xgboost import XGBClassifier

xgb = OneVsRestClassifier(XGBClassifier(
    objective='binary:logistic',
    eval_metric='logloss',
    use_label_encoder=False,
    scale_pos_weight=1, 
    random_state=42,
    n_jobs=-1,
    learning_rate=0.1,
    max_depth=6,
    n_estimators=1000
))

clf=xgb.fit(X_train, y_train)
y_pred=clf.predict(X_val)

confM = multilabel_confusion_matrix(y_val, y_pred)
print(confM)

acc=accuracy_score(y_val, y_pred)
F1=f1_score(y_val, y_pred,average='micro')
precicion = precision_score(y_val, y_pred, average='micro')
recall = recall_score(y_val, y_pred, average='micro', zero_division=0)
print('By hold-out evaluation: acc = ',acc, ',F1 = ',F1, 'precicion =  ' ,precicion , 'recall = ' , recall )
print("\nClassification Report:\n", classification_report(y_val, y_pred))
