<a href="https://colab.research.google.com/github/JGMirand4/JGMirand4/blob/main/Untitled7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install pyswarm


In [13]:
import numpy as np

# Funções definidas no problema
def pdiv(x, y):
    return np.divide(x, y, out=np.zeros_like(x), where=y!=0)

def psqrt(x):
    return np.sqrt(np.abs(x))

def plog(x):
    return np.log(np.abs(x) + 1)

# Funções primitivas
def eval_expression(expr, x):
    return eval(expr)

# Gramática definida
grammar = {
    "<expr>": [
        "<expr> + <expr>",
        "<expr> - <expr>",
        "<expr> * <expr>",
        "pdiv(<expr>, <expr>)",
        "psqrt(<expr>)",
        "np.abs(<expr>)",
        "np.sin(<expr>)",
        "np.tanh(<expr>)",
        "np.exp(<expr>)",
        "plog(<expr>)",
        "<var>"
    ],
    "<var>": [
        "x[:, 0]", "x[:, 1]", "x[:, 2]", "x[:, 3]", "x[:, 4]",
        "x[:, 5]", "x[:, 6]", "x[:, 7]", "x[:, 8]", "x[:, 9]",
        "x[:, 10]", "x[:, 11]", "x[:, 12]", "x[:, 13]",
        "x[:, 14]", "x[:, 15]", "x[:, 16]", "x[:, 17]"
    ]
}


In [14]:
import random

def generate_expression(symbol, grammar):
    """Gera uma expressão válida baseada na gramática."""
    if symbol in grammar:
        production = random.choice(grammar[symbol])  # Seleciona uma regra aleatória
        return ''.join([generate_expression(sym, grammar) if sym in grammar else sym
                        for sym in production.split()])
    else:
        return symbol


In [16]:
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
from sklearn.model_selection import train_test_split
import pandas as pd

# Função de fitness que avalia a expressão gerada
def fitness_function(expr, x_train, y_train, x_test, y_test):
    try:
        # Avaliar a expressão no conjunto de treino
        y_pred_train = eval_expression(expr, x_train)
        y_pred_train = np.where(y_pred_train > 0.5, 1, 0)  # Classificação binária

        # Avaliar a expressão no conjunto de teste
        y_pred_test = eval_expression(expr, x_test)
        y_pred_test = np.where(y_pred_test > 0.5, 1, 0)

        # Calcular as métricas no conjunto de teste
        accuracy = accuracy_score(y_test, y_pred_test)
        precision = precision_score(y_test, y_pred_test)
        recall = recall_score(y_test, y_pred_test)
        f1 = f1_score(y_test, y_pred_test)

        # Retornar as métricas
        return accuracy, precision, recall, f1

    except Exception as e:
        # Penalizar expressões inválidas, retornando zeros para todas as métricas
        return 0, 0, 0, 0


In [None]:
from pyswarm import pso

# Carregar datasets
df = pd.read_csv("phishing_sbseg2020_fixed.csv")

# Supondo que a última coluna seja o rótulo (target)
X = df.iloc[:, :-1].values  # Features
y = df.iloc[:, -1].values   # Labels

# Dividir o dataset em treino e teste (80% treino, 20% teste)
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Tamanho do conjunto de treino: {len(x_train)}")
print(f"Tamanho do conjunto de teste: {len(x_test)}")

# Configuração do PSO
def optimize_pso():
    num_particles = 500
    max_iters = 1000

    # Função de fitness baseada na gramática
    def fitness_wrapper(solution):
        expr = generate_expression("<expr>", grammar)
        # Obter as métricas de avaliação para a expressão
        accuracy, precision, recall, f1 = fitness_function(expr, x_train, y_train, x_test, y_test)
        print(f"Acurácia: {accuracy}, Precisão: {precision}, Recall: {recall}, F1-score: {f1}")
        return -f1  # Otimizamos para maximizar o F1-score

    # Limites de busca do PSO
    lb = [0] * num_particles  # Limites inferiores
    ub = [1] * num_particles  # Limites superiores

    # Rodar o PSO
    best_solution, best_fitness = pso(fitness_wrapper, lb, ub, maxiter=max_iters)
    print(f"Melhor solução: {best_solution}")
    print(f"Melhor F1-score: {-best_fitness}")

optimize_pso()