In [None]:
# Cargar dataset desde seaborn
import seaborn as sns
import pandas as pd

titanic = sns.load_dataset("titanic")
titanic.head()


Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [None]:
# Procesamiento
# Eliminar columnas innecesarias
df = titanic.drop(columns=['deck', 'class', 'who', 'adult_male', 'alive', 'embark_town'])

# Eliminar filas con valores nulos en 'embarked'
df = df.dropna(subset=['embarked'])

# Imputar valores faltantes en 'age' con la mediana
df['age'] = df['age'].fillna(df['age'].median())

# Convertir variables categóricas a variables numéricas
df = pd.get_dummies(df, columns=['sex', 'embarked'], drop_first=True)

# Separar variables predictoras (X) y la variable objetivo (y)
X = df.drop('survived', axis=1)
y = df['survived']

X.head()

Unnamed: 0,pclass,age,sibsp,parch,fare,alone,sex_male,embarked_Q,embarked_S
0,3,22.0,1,0,7.25,False,True,False,True
1,1,38.0,1,0,71.2833,False,False,False,False
2,3,26.0,0,0,7.925,True,False,False,True
3,1,35.0,1,0,53.1,False,False,False,True
4,3,35.0,0,0,8.05,True,True,False,True


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Dividir en train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Modelo
clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, y_train)

# Predicciones
y_pred = clf.predict(X_test)

# Evaluación
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Matriz de confusión:\n", confusion_matrix(y_test, y_pred))
print("Reporte de clasificación:\n", classification_report(y_test, y_pred))

Accuracy: 0.797752808988764
Matriz de confusión:
 [[89 20]
 [16 53]]
Reporte de clasificación:
               precision    recall  f1-score   support

           0       0.85      0.82      0.83       109
           1       0.73      0.77      0.75        69

    accuracy                           0.80       178
   macro avg       0.79      0.79      0.79       178
weighted avg       0.80      0.80      0.80       178



In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Usamos solo entradas numéricas para predecir 'fare'
features = df.drop(columns=['survived', 'fare'])
target = df['fare']

# Entrenamiento
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(features, target, test_size=0.2, random_state=42)

reg = LinearRegression()
reg.fit(X_train_reg, y_train_reg)

# Predicción
y_pred_reg = reg.predict(X_test_reg)

# Evaluación
print("MSE:", mean_squared_error(y_test_reg, y_pred_reg))
print("R^2:", r2_score(y_test_reg, y_pred_reg))

MSE: 1742.7561699895527
R^2: 0.3467792463503657


In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "SVM": SVC()
}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred_model = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred_model)
    print(f"{name} Accuracy: {acc:.4f}")

Logistic Regression Accuracy: 0.7978
Decision Tree Accuracy: 0.7416
Random Forest Accuracy: 0.7640
SVM Accuracy: 0.6854


In [8]:
# Conectar Google Drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# Autenticación en Google Colab
from google.colab import auth
auth.authenticate_user()

# Configuración de usuario de Git
!git config --global user.email "genesis.riera@uees.edu.ec"
!git config --global user.name "nesisriera"

# Establecer el token de GitHub en una variable de entorno segura
import os

# Limpiar clonaciones anteriores
%cd /content
!rm -rf T1-AI-Titanic

# Clonar el repositorio correcto
!git clone https://$GITHUB_TOKEN@github.com/Lolit-78/T1-AI-Titanic.git
%cd T1-AI-Titanic

# Ruta del notebook en Google Drive
NOTEBOOK_PATH = "/content/drive/MyDrive/Colab Notebooks/03_machine_learning.ipynb"

# Verificar y copiar el archivo al repositorio
import shutil
if os.path.exists(NOTEBOOK_PATH):
    shutil.copy(NOTEBOOK_PATH, '.')
    print("✅ Notebook copiado correctamente al repositorio.")
else:
    raise FileNotFoundError(f"❌ ERROR: No se encontró el archivo en {NOTEBOOK_PATH}")

# Hacer commit y push del notebook copiado
!git add 03_machine_learning.ipynb
!git commit -m "📊 Añadir notebook de machine learning"
!git branch -M main

Mounted at /content/drive
/content
Cloning into 'T1-AI-Titanic'...
remote: Enumerating objects: 104, done.[K
remote: Counting objects: 100% (104/104), done.[K
remote: Compressing objects: 100% (101/101), done.[K
remote: Total 104 (delta 47), reused 3 (delta 1), pack-reused 0 (from 0)[K
Receiving objects: 100% (104/104), 1.44 MiB | 4.61 MiB/s, done.
Resolving deltas: 100% (47/47), done.
/content/T1-AI-Titanic
✅ Notebook copiado correctamente al repositorio.
[main 87356a4] 📊 Añadir notebook de machine learning
 1 file changed, 1 insertion(+), 912 deletions(-)
 rewrite 03_machine_learning.ipynb (97%)
