In [None]:
def save_model_info(model_NN_, base_path='model_info'):
    import os

    if not os.path.exists(base_path):
        os.makedirs(base_path)

    # Salvar o modelo completo
    model_NN_.model.save(os.path.join(base_path, 'model_complete.h5'))

    # Salvar pesos do modelo
    model_NN_.model.save_weights(os.path.join(base_path, 'model_weights.h5'))

    # Salvar arquitetura do modelo
    with open(os.path.join(base_path, 'model_architecture.json'), 'w') as f:
        f.write(model_NN_.model.to_json())

    # Salvar o histórico de treinamento
    with open(os.path.join(base_path, 'training_history.pkl'), 'wb') as f:
        pickle.dump(model_NN_.history.history, f)

    # Salvar o escalador
    joblib.dump(model_NN_.scaler, os.path.join(base_path, 'scaler.pkl'))

    # Salvar configurações do modelo
    config = {
        'test_size': model_NN_.test_size,
        'random_state': model_NN_.random_state,
        'epochs': model_NN_.epochs,
        'batch_size': model_NN_.batch_size,
        'learning_rate': model_NN_.learning_rate,
        'scaling_method': model_NN_.scaling_method,
        'features': model_NN_.features,
        'target': model_NN_.target
    }

    with open(os.path.join(base_path, 'model_config.json'), 'w') as f:
        json.dump(config, f)

    # Avaliar e salvar resultados de avaliação
    train_metrics, test_metrics = model_NN_.evaluate()

    evaluation_results = {
        'train_metrics': train_metrics,
        'test_metrics': test_metrics
    }

    with open(os.path.join(base_path, 'evaluation_results.json'), 'w') as f:
        json.dump(evaluation_results, f)

# EXEMPLO DE COMO USAR:
# Especificar o diretório onde as informações serão salvas
# directory_path = 'path/to/save/model_info'
# Salvar todas as informações importantes
# save_model_info(model_NN_, base_path=directory_path)


### Função para calcular a métrica pedida pela competição
def instacart_f1_score(data, user_id_col, true_col, pred_col):
    f1_scores = []

    users = data[user_id_col].unique()
    for user in users:
        user_data = data[data[user_id_col] == user]

        # Garantir que true_col e pred_col sejam conjuntos
        if isinstance(user_data[true_col].values[0], (np.integer, int)):
            y_true = {user_data[true_col].values[0]}
        else:
            y_true = set(user_data[true_col].values[0])

        if isinstance(user_data[pred_col].values[0], (np.integer, int)):
            y_pred = {user_data[pred_col].values[0]}
        else:
            y_pred = set(user_data[pred_col].values[0])

        if len(y_true) == 0 and len(y_pred) == 0:
            f1_scores.append(1.0)
        elif len(y_true) == 0 or len(y_pred) == 0:
            f1_scores.append(0.0)
        else:
            tp = len(y_true & y_pred)
            fp = len(y_pred - y_true)
            fn = len(y_true - y_pred)

            precision = tp / (tp + fp) if tp + fp > 0 else 0
            recall = tp / (tp + fn) if tp + fn > 0 else 0
            f1 = 2 * precision * recall / (precision + recall) if precision + recall > 0 else 0
            f1_scores.append(f1)

    return np.mean(f1_scores)

# Função de avaliação
def metricas_validacao_NN(model, X, y, threshold=0.5):
    prob = model.predict(X)
    y_pred = (prob > threshold).astype(int)

    auc = roc_auc_score(y, prob)
    logloss = log_loss(y, prob)
    accuracy = accuracy_score(y, y_pred)
    precision = precision_score(y, y_pred)
    recall = recall_score(y, y_pred)
    f1 = f1_score(y, y_pred)

    print(f'AUC: {auc:.4f}, Log Loss: {logloss:.4f}, Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}')
    return auc, logloss, accuracy, precision, recall, f1

class SimplifiedNNModel:
    def __init__(self, data, features, target, test_flag_label=None, test_size=0.3, random_state=42, epochs=50, batch_size=32, learning_rate=0.001, scaling_method='standard'):
        self.data = data
        self.features = features
        self.target = target
        self.test_flag_label = test_flag_label
        self.test_size = test_size
        self.random_state = random_state
        self.epochs = epochs
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.scaling_method = scaling_method
        self.model = None
        self.history = None
        self.scaler = None
        self.preprocess()

    def preprocess(self):
        if self.test_flag_label:
            train_data = self.data[self.data[self.test_flag_label] == 0]
            test_data = self.data[self.data[self.test_flag_label] == 1]
            self.X_train, self.y_train = train_data[self.features], train_data[self.target]
            self.X_test, self.y_test = test_data[self.features], test_data[self.target]
        else:
            X = self.data[self.features]
            y = self.data[self.target]
            self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=self.test_size, random_state=self.random_state, stratify=y)

        # Escolha do método de escalonamento
        if self.scaling_method == 'standard':
            self.scaler = StandardScaler()
        elif self.scaling_method == 'minmax':
            self.scaler = MinMaxScaler()
        else:
            raise ValueError("Método de escalonamento inválido. Use 'standard' ou 'minmax'.")

        self.scaler.fit(self.X_train)
        self.X_train = self.scaler.transform(self.X_train)
        self.X_test = self.scaler.transform(self.X_test)

    def build_model(self):
        model = Sequential()
        model.add(Dense(254, input_dim=len(self.features), activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(128, activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(64, activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(32, activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(1, activation='sigmoid'))

        optimizer = Adam(learning_rate=self.learning_rate)
        model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
        return model

    def train(self):
        self.model = self.build_model()
        self.history = self.model.fit(self.X_train, self.y_train, epochs=self.epochs, batch_size=self.batch_size, validation_data=(self.X_test, self.y_test), verbose=1)

    def evaluate(self):
        if self.model is None:
            print("Modelo não foi treinado.")
            return None, None
        print("Training Metrics:")
        train_metrics = metricas_validacao_NN(self.model, self.X_train, self.y_train)
        print("Testing Metrics:")
        test_metrics = metricas_validacao_NN(self.model, self.X_test, self.y_test)
        return train_metrics, test_metrics

    def plot_roc_curve(self):
        if self.model is None:
            print("Modelo não foi treinado.")
            return
        prob_test = self.model.predict(self.X_test)
        fpr, tpr, _ = roc_curve(self.y_test, prob_test)
        auc = roc_auc_score(self.y_test, prob_test)

        plt.figure(dpi=300)
        plt.plot(fpr, tpr, color="darkorange", label="AUC = %0.2f" % auc)
        plt.plot([0, 1], [0, 1], color="navy", linestyle="--")
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel("False Positive Rate")
        plt.ylabel("True Positive Rate")
        plt.title("ROC Curve")
        plt.legend(loc="lower right")
        plt.show()

    def plot_loss(self):
        if self.model is None:
            print("Modelo não foi treinado.")
            return
        history = self.history.history
        plt.figure(dpi=300)
        plt.plot(history['loss'], label='train_loss')
        plt.plot(history['val_loss'], label='val_loss')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.title('Loss Curve')
        plt.legend(loc='upper right')
        plt.show()

    def predict(self, new_data, threshold=0.5):
        if self.model is None:
            raise ValueError("O modelo não foi treinado ainda.")
        new_data_scaled = self.scaler.transform(new_data)
        probabilities = self.model.predict(new_data_scaled)
        binary_predictions = (probabilities > threshold).astype(int)
        return probabilities, binary_predictions

    def run(self):
        self.train()
        train_metrics, test_metrics = self.evaluate()
        if train_metrics is None or test_metrics is None:
            print("Não foi possível avaliar o modelo.")
            return
        self.plot_roc_curve()
        self.plot_loss()
        return

# Exemplo de uso:
# model = SimplifiedNNModel(data=df, features=['feature1', 'feature2'], target='target', test_flag_label='is_test')
# model.run()
