In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from skopt import BayesSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE, ADASYN
from imblearn.under_sampling import TomekLinks, RandomUnderSampler
from sklearn.impute import SimpleImputer
from sklearn.impute import KNNImputer

In [None]:
# Carregando a base Titanic
df = pd.read_csv("https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv")

In [None]:
# Tratamento inicial dos dados
df = df[['Survived', 'Pclass', 'Sex', 'Age', 'Fare', 'Embarked']]
df.dropna(subset=['Embarked'], inplace=True)
df['Sex'] = df['Sex'].map({'male': 0, 'female': 1})
df['Embarked'] = df['Embarked'].map({'C': 0, 'Q': 1, 'S': 2})

# Separando features e labels
X = df.drop(columns=['Survived'])
y = df['Survived']

In [None]:
# Divisão treino/teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Imputação de valores ausentes antes de aplicar SMOTE
imp_mean = SimpleImputer(strategy='mean')
X_train_imputed = imp_mean.fit_transform(X_train)

# SMOTE
smote = SMOTE()
X_res, y_res = smote.fit_resample(X_train_imputed, y_train)

# TomekLinks
tomek = TomekLinks()
X_res_tl, y_res_tl = tomek.fit_resample(X_train_imputed, y_train)

# RandomUnderSampler
under = RandomUnderSampler()
X_res_under, y_res_under = under.fit_resample(X_train_imputed, y_train)

# ADASYN (Substituto para DSTO-GAN)
adasyn = ADASYN()
X_res_adasyn, y_res_adasyn = adasyn.fit_resample(X_train_imputed, y_train)

# Avaliação com modelos balanceados
for name, X_bal, y_bal in zip(["SMOTE", "TomekLinks", "RandomUnderSampler", "ADASYN"],
                               [X_res, X_res_tl, X_res_under, X_res_adasyn],
                               [y_res, y_res_tl, y_res_under, y_res_adasyn]):
    model = RandomForestClassifier().fit(X_bal, y_bal)
    y_pred = model.predict(X_test)
    print(f"{name} - Precisão:", precision_score(y_test, y_pred))
    print(f"{name} - Recall:", recall_score(y_test, y_pred))
    print(f"{name} - F1-Score:", f1_score(y_test, y_pred))