In [None]:
import pandas as pd
import numpy as np

from dsgd.DSClassifierMultiQ import DSClassifierMultiQ



from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt

# Clasificadores
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

# Exploracion de Datos y pre procesamiento

In [None]:
data = pd.read_csv('data/obesity.csv')
print("tamaño del dataset: ", data.shape)
data["Gender"] = data["Gender"].apply(lambda x: 1 if x =="Male" else 0)
data["CALC"]= data["CALC"].map({'no': 0,'Sometimes': 1,'Frequently': 2,'Always': 3})
data["FAVC"] = data["FAVC"].apply(lambda x: 1 if x =="yes" else 0)
data["SCC"] = data["SCC"].apply(lambda x: 1 if x =="yes" else 0)
data["SMOKE"] = data["SMOKE"].apply(lambda x: 1 if x =="yes" else 0)
data["family_history_with_overweight"] = data["family_history_with_overweight"].apply(lambda x: 1 if x =="yes" else 0)
data["CAEC"]= data["CAEC"].map({'no': 0,'Sometimes': 1,'Frequently': 2,'Always': 3})
data = pd.get_dummies(data, columns=["MTRANS"], dtype=int)
print(data.dtypes)
print(data["NObeyesdad"].unique())
#desordenar
data = data.sample(frac=1).reset_index(drop=True)

In [None]:
y = data["NObeyesdad"]
X = data.drop(columns=["NObeyesdad"])
print(y.head())

# Testeo de los clasificadores

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)

In [None]:
# Diccionario de modelos
models = {
    "Logistic Regression": LogisticRegression(),
    "Random Forest": RandomForestClassifier(),
    "Support Vector Machine": SVC(),
    "K-Nearest Neighbors": KNeighborsClassifier()
}

# Función para entrenar y evaluar modelos
def train_and_evaluate(models, X_train, y_train, X_test, y_test):
    for name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        print(f'{name} Accuracy: {accuracy:.4f}')
        print(f'Classification Report for {name}:\n{classification_report(y_test, y_pred)}')
        print(f'Confusion Matrix for {name}:\n{confusion_matrix(y_test, y_pred)}\n')
        print(f'---------------------------------------------------\n')

# Llamada a la función
train_and_evaluate(models, X_train, y_train, X_test, y_test)


# Testeo del clasificador de DS

In [None]:
Y = y.map({'Insufficient_Weight': 0, 'Normal_Weight': 1, 'Overweight_Level_I': 2, 'Overweight_Level_II': 3, 'Obesity_Type_I': 4, 'Obesity_Type_II': 5, 'Obesity_Type_III': 6})
X = X.apply(pd.to_numeric)

cut = int(0.25 * len(data))

X_train = X.iloc[:-cut].values
X_test = X.iloc[-cut:].values
Y_train = Y.iloc[:-cut].values
Y_test = Y.iloc[-cut:].values


print(len(X_train), len(X_test), len(Y_train), len(Y_test))
print(Y.head())


In [None]:
DSC = DSClassifierMultiQ(7, min_iter=50, max_iter=400, debug_mode=True, \
                         lossfn="MSE", num_workers=0, min_dloss=1e-7)

In [None]:
losses, epoch, dt = DSC.fit(X_train, Y_train, add_single_rules=True,
                            single_rules_breaks=3, add_mult_rules=False,
                                column_names=data.columns[:-1], print_every_epochs=31, print_final_model=True)

In [None]:
Y_pred = DSC.predict(X_test)

print(f"Accuracy: {accuracy_score(Y_test, Y_pred)}")
print(f'Classification Report:\n{classification_report(Y_test, Y_pred)}')
print(f'Confusion Matrix:\n{confusion_matrix(Y_test, Y_pred)}\n')

In [None]:
DSC2 = DSClassifierMultiQ(7, min_iter=50, max_iter=400, debug_mode=True, \
                         lossfn="MSE", num_workers=0, min_dloss=1e-7, precompute_rules=True)

In [None]:
losses, epoch, dt = DSC2.fit(X_train, Y_train, add_single_rules=True,
                            single_rules_breaks=3, add_mult_rules=True,
                                column_names=data.columns[:-1], print_every_epochs=31, print_final_model=True)

In [None]:
Y2_pred = DSC2.predict(X_test)

print(f"Accuracy: {accuracy_score(Y_test, Y2_pred)}")
print(f'Classification Report:\n{classification_report(Y_test, Y2_pred)}')
print(f'Confusion Matrix:\n{confusion_matrix(Y_test, Y2_pred)}\n')

In [None]:
DSC3 = DSClassifierMultiQ(7, min_iter=50, max_iter=400, debug_mode=True, \
                         lossfn="MSE", num_workers=0, min_dloss=1e-7, precompute_rules=True)

In [None]:
losses, epoch, dt = DSC3.fit(X_train, Y_train, add_single_rules=True,
                            single_rules_breaks=5, add_mult_rules=False,
                                column_names=data.columns[:-1], print_every_epochs=31, print_final_model=True)

In [None]:
DSC3.print_most_important_rules(classes=["Insufficient_Weight", "Normal_Weight", "Overweight_Level_I", "Overweight_Level_II", "Obesity_Type_I", "Obesity_Type_II", "Obesity_Type_III"])

In [None]:
Y3_pred = DSC3.predict(X_test)

print(f"Accuracy: {accuracy_score(Y_test, Y3_pred)}")
print(f'Classification Report:\n{classification_report(Y_test, Y3_pred)}')
print(f'Confusion Matrix:\n{confusion_matrix(Y_test, Y3_pred)}\n')

In [None]:
DSC4 = DSClassifierMultiQ(7, min_iter=50, max_iter=400, debug_mode=True, \
                         lossfn="MSE", num_workers=0, min_dloss=1e-7, precompute_rules=True)

In [None]:
losses, epoch, dt = DSC4.fit(X_train, Y_train, add_single_rules=True,
                            single_rules_breaks=2, add_mult_rules=False,
                                column_names=data.columns[:-1], print_every_epochs=31, print_final_model=True)

In [None]:
Y4_pred = DSC4.predict(X_test)

print(f"Accuracy: {accuracy_score(Y_test, Y4_pred)}")
print(f'Classification Report:\n{classification_report(Y_test, Y4_pred)}')
print(f'Confusion Matrix:\n{confusion_matrix(Y_test, Y4_pred)}\n')