# Notebook 3: Machine Learning Básico

Este notebook aplica modelos de Machine Learning con `scikit-learn` para clasificación y regresión:

- Clasificación con dataset Iris
- Regresión con dataset Boston (sintético)
- Evaluación de métricas



In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, mean_squared_error

from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_wine, load_diabetes

import warnings
warnings.filterwarnings('ignore')
sns.set(style="whitegrid")
plt.rcParams["figure.figsize"] = (10, 6)


In [2]:
iris = sns.load_dataset("iris")
iris.head()


Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [5]:
# Variables predictoras y variable objetivo
X = iris.drop("species", axis=1)
y = iris["species"]


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)



In [6]:
model_lr = LogisticRegression()
model_lr.fit(X_train_scaled, y_train)
y_pred_lr = model_lr.predict(X_test_scaled)

print("Accuracy:", accuracy_score(y_test, y_pred_lr))
print("Reporte de clasificación:\n", classification_report(y_test, y_pred_lr))


Accuracy: 1.0
Reporte de clasificación:
               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        19
  versicolor       1.00      1.00      1.00        13
   virginica       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45



In [7]:
models = {
    "Logistic Regression": LogisticRegression(),
    "KNN": KNeighborsClassifier(),
    "Decision Tree": DecisionTreeClassifier()
}

for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    acc = accuracy_score(y_test, y_pred)
    print(f"{name}: Accuracy = {acc:.2f}")


Logistic Regression: Accuracy = 1.00
KNN: Accuracy = 1.00
Decision Tree: Accuracy = 1.00


In [8]:
diabetes = load_diabetes()
X = diabetes.data
y = diabetes.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

model_reg = LinearRegression()
model_reg.fit(X_train, y_train)
y_pred = model_reg.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
print("Error cuadrático medio (MSE):", mse)


Error cuadrático medio (MSE): 2821.750981001311


# Conclusiones

- Logistic Regression es un modelo base confiable para clasificación.
- KNN y Árboles también son útiles dependiendo del contexto y los datos.
- Para regresión, el modelo lineal es fácil de interpretar y rápido de entrenar.
- La métrica `accuracy` es clave para clasificación, y `MSE` para regresión.


In [12]:
from google.colab import auth
auth.authenticate_user()

!git config --global user.email "alejandra.montenegro@uees.edu.ec"
!git config --global user.name "Alejandra Montenegro"

import ipykernel
import requests
import json

def get_notebook_name():
    """
    Returns the name of the current notebook.
    """
    try:
        id = ipykernel.get_connection_file().split('/')[-1].split('-')[1]
        response = requests.get(f'http://localhost:9000/api/sessions/{id}')
        response.raise_for_status()
        session_info = json.loads(response.text)
        return session_info['name']
    except Exception as e:
        print(f"Error getting notebook name: {e}")
        return None

def save_to_github(notebook_name, commit_message="03_Machine_Learning_Basico"):
    import os

    repo_url = "https://github.com/Alejandra-Montenegro/UEES-IA-Semana1-Montenegro-Alejandra.git"
    repo_name = "UEES-IA-Semana1-Montenegro-Alejandra"

    if not os.path.exists(repo_name):
        !git clone {repo_url}

    %cd {repo_name}

    os.makedirs("02_Laboratorio/notebooks", exist_ok=True)

    !cp /content/{notebook_name} 02_Laboratorio/notebooks/
    !git add .
    !git commit -m "{commit_message}"
    !git push origin main

    print(f"✅ {notebook_name} guardado correctamente en GitHub 🚀")

# Get the current notebook name and save to GitHub
current_notebook_name = get_notebook_name()
if current_notebook_name:
    save_to_github(current_notebook_name)

Error getting notebook name: HTTPConnectionPool(host='localhost', port=9000): Max retries exceeded with url: /api/sessions/89e3f243 (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x79013c335850>: Failed to establish a new connection: [Errno 111] Connection refused'))
