In [None]:
import pandas as pd
import numpy as np

from dsgd.DSClassifierMultiQ import DSClassifierMultiQ



from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt

# Clasificadores
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

# Exploracion de Datos y pre procesamiento

In [None]:
data = pd.read_csv('data/list_attr_celeba.csv')
print("tamaño del dataset: ", data.shape)
data = data.drop('image_id', axis=1)
data = data.replace(-1, 0)
data = data.sample(frac=1).reset_index(drop=True)
print(data.dtypes)

In [None]:
y = data["Attractive"]
X = data.drop(columns=["Attractive"])

# Testeo de los clasificadores

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)

In [None]:
# Diccionario de modelos
accuracys = {}
models = {
    "Logistic Regression": LogisticRegression(),
    "Random Forest": RandomForestClassifier(max_depth=5),
    "Support Vector Machine": SVC(),
    "K-Nearest Neighbors": KNeighborsClassifier()
}

# Función para entrenar y evaluar modelos
def train_and_evaluate(models, X_train, y_train, X_test, y_test):
    for name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        print(f'{name} Accuracy: {accuracy:.4f}')
        accuracys[name] = accuracy
        print(f'Classification Report for {name}:\n{classification_report(y_test, y_pred)}')
        print(f'Confusion Matrix for {name}:\n{confusion_matrix(y_test, y_pred)}\n')
        print(f'---------------------------------------------------\n')

# Llamada a la función
train_and_evaluate(models, X_train, y_train, X_test, y_test)


# Testeo del clasificador de DS

In [None]:
Y = y
X = X.apply(pd.to_numeric)

cut = int(0.25 * len(data))

X_train = X.iloc[:-cut].values
X_test = X.iloc[-cut:].values
Y_train = Y.iloc[:-cut].values
Y_test = Y.iloc[-cut:].values


print(len(X_train), len(X_test), len(Y_train), len(Y_test))
print(Y.head())


## clasificador 1

In [None]:
DSC = DSClassifierMultiQ(2, min_iter=50, max_iter=400, debug_mode=True, \
                         lossfn="MSE", num_workers=0, min_dloss=1e-7, precompute_rules=True)

In [None]:
losses, epoch, dt = DSC.fit(X_train, Y_train, add_single_rules=True,
                            single_rules_breaks=3, add_mult_rules=False,
                                column_names=data.columns[:-1], print_every_epochs=31, print_final_model=True)

In [None]:
DSC.print_most_important_rules(classes=["bad appearance",  "good appearance"])

In [None]:
Y_pred = DSC.predict(X_test)
accuracy = accuracy_score(Y_test, Y_pred)
accuracys["DSClassifierMultiQ"] = accuracy
print(f'Accuracy: {accuracy:.4f}')
print(f'Classification Report:\n{classification_report(Y_test, Y_pred)}')
print(f'Confusion Matrix:\n{confusion_matrix(Y_test, Y_pred)}\n')

## clasifificador 2

In [None]:
DSC2 = DSClassifierMultiQ(2, min_iter=50, max_iter=400, debug_mode=True, optim="sgd", \
                         lossfn="MSE", num_workers=0, min_dloss=1e-7, precompute_rules=True)

In [None]:
losses, epoch, dt = DSC2.fit(X_train, Y_train, add_single_rules=True,
                            single_rules_breaks=3, add_mult_rules=False,
                                column_names=data.columns[:-1], print_every_epochs=31, print_final_model=True)

In [None]:
DSC2.print_most_important_rules(classes=["bad appearance",  "good appearance"])

In [None]:
Y_pred2 = DSC2.predict(X_test)
accuracy = accuracy_score(Y_test, Y_pred2)
accuracys["DSClassifierMultiQ SGD"] = accuracy
print(f'Accuracy: {accuracy:.4f}')
print(f'Classification Report:\n{classification_report(Y_test, Y_pred2)}')
print(f'Confusion Matrix:\n{confusion_matrix(Y_test, Y_pred2)}\n')

## clasificador 3

In [None]:
DSC3 = DSClassifierMultiQ(2, min_iter=50, max_iter=400, debug_mode=True, \
                         lossfn="CE", num_workers=0, min_dloss=1e-7, precompute_rules=True)

In [None]:
losses, epoch, dt = DSC3.fit(X_train, Y_train, add_single_rules=True,
                            single_rules_breaks=3, add_mult_rules=False,
                                column_names=data.columns[:-1], print_every_epochs=31, print_final_model=True)

In [None]:
DSC3.print_most_important_rules(classes=["bad appearance",  "good appearance"])

In [None]:
Y_pred3 = DSC3.predict(X_test)
accuracy = accuracy_score(Y_test, Y_pred3)
accuracys["DSClassifierMultiQ CE"] = accuracy
print(f'Accuracy: {accuracy:.4f}')
print(f'Classification Report:\n{classification_report(Y_test, Y_pred3)}')
print(f'Confusion Matrix:\n{confusion_matrix(Y_test, Y_pred3)}\n')

## clasifificador 4

In [None]:
DSC4 = DSClassifierMultiQ(2, min_iter=50, max_iter=400, debug_mode=True, optim="sgd", \
                         lossfn="CE", num_workers=0, min_dloss=1e-7, precompute_rules=True)

In [None]:
losses, epoch, dt = DSC4.fit(X_train, Y_train, add_single_rules=True,
                            single_rules_breaks=3, add_mult_rules=False,
                                column_names=data.columns[:-1], print_every_epochs=31, print_final_model=True)

In [None]:
DSC4.print_most_important_rules(classes=["bad appearance",  "good appearance"])

In [None]:
Y_pred4 = DSC4.predict(X_test)
accuracy = accuracy_score(Y_test, Y_pred4)
accuracys["DSClassifierMultiQ CE  SGD"] = accuracy
print(f'Accuracy: {accuracy:.4f}')
print(f'Classification Report:\n{classification_report(Y_test, Y_pred4)}')
print(f'Confusion Matrix:\n{confusion_matrix(Y_test, Y_pred4)}\n')

## clasifificador 5

In [None]:
DSC5 = DSClassifierMultiQ(2, min_iter=50, max_iter=400, debug_mode=True, \
                         lossfn="MSE", num_workers=0, min_dloss=1e-7, precompute_rules=True)

In [None]:
losses, epoch, dt = DSC5.fit(X_train, Y_train, add_single_rules=True,
                            single_rules_breaks=3, add_mult_rules=True,
                                column_names=data.columns[:-1], print_every_epochs=31, print_final_model=True)

In [None]:
DSC5.print_most_important_rules(classes=["bad appearance",  "good appearance"])

In [None]:
Y_pred5 = DSC5.predict(X_test)
accuracy = accuracy_score(Y_test, Y_pred5)
accuracys["DSClassifierMultiQ MSE adam multi-layer"] = accuracy
print(f'Accuracy: {accuracy:.4f}')
print(f'Classification Report:\n{classification_report(Y_test, Y_pred5)}')
print(f'Confusion Matrix:\n{confusion_matrix(Y_test, Y_pred5)}\n')

## resultados

In [None]:
for key in accuracys:
    print(key, accuracys[key])