In [None]:
# Importações
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import seaborn as sns

url = 'https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv'
df = pd.read_csv(url)

df.head()

In [None]:
df = df.drop(columns=['embarked', 'alone', 'fare'])

In [None]:
df['age'].fillna(df['age'].mean(), inplace=True)

df = pd.get_dummies(df, columns=['pclass'], prefix='pclass')

In [None]:
df = df.drop(columns=['name', 'ticket', 'cabin', 'sibsp', 'parch', 'who', 'deck', 'sex', 'adult_male'])

df['sex'] = pd.get_dummies(df['sex'], drop_first=True)

X = df.drop('survived', axis=1)
y = df['survived']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [None]:
models = {
    'Random Forest': RandomForestClassifier(random_state=42),
    'Naive Bayes': GaussianNB(),
    'SVM': SVC()
}

results = {}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    results[name] = {
        'Accuracy': accuracy_score(y_test, y_pred),
        'Precision': precision_score(y_test, y_pred),
        'Recall': recall_score(y_test, y_pred)
    }

results_df = pd.DataFrame(results).T
results_df

In [None]:
plt.figure(figsize=(8,6))
sns.barplot(x=results_df.index, y=results_df['Accuracy'])
plt.title('Comparação de Acurácia entre Modelos')
plt.ylabel('Acurácia')
plt.xlabel('Modelo')
plt.ylim(0, 1)
plt.show()