In [2]:
import pandas as pd
import numpy as np
import joblib
from google.cloud import storage
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Configurar nombre del bucket y archivo en Cloud Storage
BUCKET_NAME = "datasets-cardiovasculares"
FILE_PATH = "dataset_unificado.csv"
MODEL_PATH = "models" 

# Descargar el archivo desde Cloud Storage
client = storage.Client()
bucket = client.bucket(BUCKET_NAME)
blob = bucket.blob(FILE_PATH)
blob.download_to_filename("dataset.csv")

# Cargar el dataset
df = pd.read_csv("dataset.csv", delimiter=",")

# Aplicar One-Hot Encoding a 'Type'
df = pd.get_dummies(df, columns=["Type"], prefix="Type")

# Codificar variables categóricas
level_mapping = {"Beginner": 0, "Intermediate": 1, "Expert": 2}
df["Level"] = df["Level"].map(level_mapping)

equipamiento_categorias = {
    "Body Only": 0, "nan": 0, "Dumbbell": 1, "Bands": 1, "Medicine Ball": 1,
    "Barbell": 2, "Kettlebells": 2, "Machine": 3, "Cable": 3, "Other": 3
}
df["Equipment_Encoded"] = df["Equipment"].map(equipamiento_categorias).fillna(0).astype(int)

bodypart_categorias = {
    "Abdominals": "Core", "Lower Back": "Core",
    "Quadriceps": "Piernas", "Hamstrings": "Piernas", "Calves": "Piernas",
    "Chest": "Tren Superior", "Shoulders": "Tren Superior", "Lats": "Tren Superior",
    "Biceps": "Brazos", "Triceps": "Brazos"
}
df["BodyPart_Category"] = df["BodyPart"].map(bodypart_categorias)

# Aplicar Label Encoding a `BodyPart_Category`
le_bodypart_cat = LabelEncoder()
df["BodyPart_Category_Encoded"] = le_bodypart_cat.fit_transform(df["BodyPart_Category"])

# Selección de columnas para entrenamiento
feature_cols = [
    "Cardiovascular_Safe", "BodyPart_Category_Encoded", "Equipment_Encoded", "Level",
    "Type_Cardio", "Type_Plyometrics", "Type_Strength", "Type_Stretching"
]

# Escalar datos
scaler = StandardScaler()
df_scaled = scaler.fit_transform(df[feature_cols])

# Entrenar modelo K-Means
kmeans = KMeans(n_clusters=7, random_state=58)
kmeans.fit(df_scaled)

# Asignar los clusters al dataset
df["Cluster"] = kmeans.predict(df_scaled)

# Guardar dataset con los clusters asignados
df.to_csv("dataset_clustering.csv", index=False)

# Subir dataset actualizado a Cloud Storage
blob = bucket.blob("dataset_clustering.csv")
blob.upload_from_filename("dataset_clustering.csv")

# Guardar modelo y scaler localmente
joblib.dump(kmeans, "kmeans_model.pkl")
joblib.dump(scaler, "scaler.pkl")

# Subir modelo a Cloud Storage
blob = bucket.blob(f"{MODEL_PATH}/kmeans_model.pkl")
blob.upload_from_filename("kmeans_model.pkl")

blob = bucket.blob(f"{MODEL_PATH}/scaler.pkl")
blob.upload_from_filename("scaler.pkl")

print("Modelo guardado en Cloud Storage exitosamente.")


Modelo guardado en Cloud Storage exitosamente.
