In [None]:
import pandas as pd
from dsgd.DSClassifierMultiQ import DSClassifierMultiQ


from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt

# Clasificadores
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

accuracys = {}

In [None]:
data = pd.read_csv('data/nasa.csv')
data.drop(["Neo Reference ID", "Name", "Close Approach Date", "Orbit Determination Date" ,"Orbiting Body", "Equinox"] , axis=1, inplace=True)
print("tamaño del dataset: ", data.shape)
print(data.dtypes)

In [None]:
print(data.head(1))

In [None]:
data = data.sample(frac=1).reset_index(drop=True)
data["Hazardous"] = data["Hazardous"].apply(lambda x: 1 if x == True else 0)
# Convert everithing to numeric
data = data.apply(pd.to_numeric)
cut = int(0.3 * len(data))

# Separate the trainig and tesing sets
X_train = data.iloc[:cut, :-1].values
y_train = data.iloc[:cut, -1].values
X_test = data.iloc[cut:, :-1].values
y_test = data.iloc[cut:, -1].values

In [None]:
DSC = DSClassifierMultiQ(2, min_iter=50, max_iter=400, debug_mode=True, \
                         lossfn="MSE", num_workers=0, min_dloss=1e-7, precompute_rules=True)
DSC2 = DSClassifierMultiQ(2, min_iter=50, max_iter=400, debug_mode=True, \
                         lossfn="MSE", num_workers=0, min_dloss=1e-7, precompute_rules=True)

In [None]:
losses, epoch, dt = DSC.fit(X_train, y_train, add_single_rules=True,
                            single_rules_breaks=3, add_mult_rules=False,
                                column_names=data.columns[:-1], print_every_epochs=31, print_final_model=True)

In [None]:
losses, epoch, dt = DSC2.fit(X_train, y_train, add_single_rules=True,
                            single_rules_breaks=3, add_mult_rules=True,
                                column_names=data.columns[:-1], print_every_epochs=31, print_final_model=True)

In [None]:
# Using the predict method, you can predict the classes of new records
# In this case we will predict the testing set
y_pred = DSC.predict(X_test)
y_pred2 = DSC2.predict(X_test)

In [None]:
print(classification_report(y_test, y_pred))
print(classification_report(y_test, y_pred2))
accuracys["DSClassifierMultiQ"] = accuracy_score(y_test, y_pred)
accuracys["DSClassifierMultiQ2"] = accuracy_score(y_test, y_pred2)

In [None]:
print(confusion_matrix(y_test, y_pred,))
print(confusion_matrix(y_test, y_pred2,))

In [None]:
# Global interpretability
DSC.print_most_important_rules(classes=["no hazzardous", "hazzardous"])

In [None]:
X = data.drop('Hazardous', axis=1)  # Asumiendo que 'target' es la columna objetivo
y = data['Hazardous']

X1_train, X1_test, y1_train, y1_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)


In [None]:
# Diccionario de modelos
models = {
    "Logistic Regression": LogisticRegression(),
    "Random Forest": RandomForestClassifier(max_depth=5),
    "Support Vector Machine": SVC(),
    "K-Nearest Neighbors": KNeighborsClassifier()
}

# Función para entrenar y evaluar modelos
def train_and_evaluate(models, X_train, y_train, X_test, y_test):
    for name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        accuracys[name] = accuracy
        print(f'{name} Accuracy: {accuracy:.4f}')
        print(f'Classification Report for {name}:\n{classification_report(y_test, y_pred)}')
        print(f'Confusion Matrix for {name}:\n{confusion_matrix(y_test, y_pred)}\n')

# Llamada a la función
train_and_evaluate(models, X1_train, y1_train, X1_test, y1_test)


In [None]:
for key in accuracys:
    print(key, accuracys[key])