<a href="https://colab.research.google.com/github/Coronel21yulieny/coronel21/blob/main/Untitled1grafo_arbol.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:


import heapq
import pandas as pd
from sklearn.tree import DecisionTreeClassifier, export_text
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

# -----------------------------
# Parte 1: Dijkstra
# -----------------------------
def dijkstra(graph, start, end):
    queue = []
    heapq.heappush(queue, (0, start))
    distances = {station: float('inf') for station in graph}
    distances[start] = 0
    previous_nodes = {station: None for station in graph}

    while queue:
        current_distance, current_station = heapq.heappop(queue)

        if current_station == end:
            break

        for neighbor, weight in graph[current_station].items():
            distance = current_distance + weight
            if distance < distances[neighbor]:
                distances[neighbor] = distance
                previous_nodes[neighbor] = current_station
                heapq.heappush(queue, (distance, neighbor))

    path, station = [], end
    while previous_nodes[station] is not None:
        path.insert(0, station)
        station = previous_nodes[station]
    path.insert(0, start)

    return path, distances[end]

# Grafo del metro de Medellín
graph = {
    "Santa Lucía": {"Floresta": 2},
    "Floresta": {"Santa Lucía": 2, "Estadio": 2},
    "Estadio": {"Floresta": 2, "Suramericana": 2},
    "Suramericana": {"Estadio": 2, "Cisneros": 2},
    "Cisneros": {"Suramericana": 2, "San Antonio": 3},
    "San Antonio": {"Cisneros": 3}
}

# Ejecutar Dijkstra
start_station = "Santa Lucía"
end_station = "San Antonio"
path, distance = dijkstra(graph, start_station, end_station)

print("====== ALGORITMO DE DIJKSTRA ======")
print(f"Ruta más corta: {' -> '.join(path)}")
print(f"Distancia total: {distance} minutos\n")

# -----------------------------
# Parte 2: Árbol de decisión
# -----------------------------
print("====== MODELO DE ÁRBOL DE DECISIÓN ======")

# Dataset simulado
data = {
    "origen": ["Santa Lucía", "Floresta", "Estadio", "Suramericana", "Cisneros"],
    "destino": ["Floresta", "Estadio", "Suramericana", "Cisneros", "San Antonio"],
    "tiempo": [2, 2, 2, 2, 3],
    "recomendado": [1, 1, 1, 1, 0]
}
df = pd.DataFrame(data)

# Codificación de variables categóricas
X = pd.get_dummies(df[["origen", "destino", "tiempo"]])
y = df["recomendado"]

# Separar entrenamiento/prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Normalización (solo si es necesario para columnas numéricas)
scaler = StandardScaler()

# Normalizar solo las columnas numéricas (en este caso, 'tiempo')
X_train[['tiempo']] = scaler.fit_transform(X_train[['tiempo']])
X_test[['tiempo']] = scaler.transform(X_test[['tiempo']])

# Entrenar árbol de decisión
modelo = DecisionTreeClassifier(random_state=42)
modelo.fit(X_train, y_train)

# Evaluar modelo
y_pred = modelo.predict(X_test)
print(f"Precisión del modelo: {accuracy_score(y_test, y_pred)}\n")

# Visualización textual del árbol
arbol = export_text(modelo, feature_names=X.columns.tolist())
print("Estructura del árbol de decisión:\n")
print(arbol)

# -----------------------------
# Parte 3: Ajuste de Hiperparámetros con GridSearchCV
# -----------------------------
param_grid = {
    'max_depth': [3, 5, 7, 10],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Reducir el número de divisiones en la validación cruzada (cv=3)
grid_search = GridSearchCV(estimator=DecisionTreeClassifier(random_state=42),
                           param_grid=param_grid,
                           cv=3,  # Reducir a 3 divisiones
                           n_jobs=-1,
                           verbose=1)

grid_search.fit(X_train, y_train)

# Mejor modelo encontrado
best_model = grid_search.best_estimator_
print(f"Mejor modelo encontrado: {best_model}")

# Evaluar el mejor modelo
y_pred_best = best_model.predict(X_test)
print(f"Precisión del mejor modelo: {accuracy_score(y_test, y_pred_best)}\n")


Ruta más corta: Santa Lucía -> Floresta -> Estadio -> Suramericana -> Cisneros -> San Antonio
Distancia total: 11 minutos

Precisión del modelo: 0.5

Estructura del árbol de decisión:

|--- class: 0

Fitting 3 folds for each of 36 candidates, totalling 108 fits
Mejor modelo encontrado: DecisionTreeClassifier(max_depth=3, random_state=42)
Precisión del mejor modelo: 0.5

