In [None]:
#1. Librerías básicas
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

#2. Librerías de ML
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import mutual_info_classif, SelectKBest, chi2
from sklearn.metrics import classification_report, confussion_matrix, accuracy_score, f1_score

#3. Modelo final
from sklearn.ensemble import RandomForestClassifier

#4. LazyPredict
!pip install lazypredict -q
from lazypredict.Supervised import LazyClassifier

#5. MLflow
!pip install mlflow -q
import mlflow
import mlflow.sklearn

In [None]:
# Dataset de ejemplo: cáncer de mama
data = load_breast_cancer()
df=pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target
df.head()

In [None]:
# Exploración rápida
df.info()
df.describe()
ds.isnull().sum()
sns.countplot (data=df, x='target')
plt.title ("Distribución de clases")


In [None]:
X = df.drop('target', axis=1)
y = df['target']

#Información mutua
mi = mutual_info_classif (X, y)
mi_series = pd.Series (mi, index=X.columns).sort_values (ascending=False)

plt.figure (figsize=(10,6))
mi_series.plot (kind='bar')
plt.title ("Ganancia de información mutua")


In [None]:
# Comparar con chi2
chi2_selector = SelectKBest(score_func=chi2, k='all')
chi2_selector.fit(X,y)
chi2_scores = pd.Series (chi2_selector.scores_, index=X.columns).sort_values(ascending=False)

plt.figure(figsize=(10,6))
chi2_scores.plot(kind='bar', color='orange')
plt.title ("Puntajes Chi2")


In [None]:
# Elegimos las 10 mejores características
top_features = mi_series.head(10).index
X_selected = X[top_features]

X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, raandom_state=42)

clf= LazyClassifier (verbose=0, ignore_warnings=True)
models, predictions =clf.fit(X_train, X_test, y_train, y_test)
models


In [None]:
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print (classification_report(y_test, y_pred))
print ("Accuracy:", accuracy_score(y_test, y_pred))
print ("F1 Score:", f1_score(y_test, y_pred))


In [None]:
with mlflow.start_run():
    acc= accuracy_score(y_test, y_pred)
    f1= f1_score(y_test, y_pred)

    mlflow.log_param("model", "RandomForestClassifier")
    mlflow.log_metric("accuracy", acc)
    mlflow.log_metric("f1_score", f1)

    mlflow.sklearn.log_model(model, "random_forest_model")

print ("MLflow run registrado correctamente")