<a href="https://colab.research.google.com/github/AlejandroOliverosVera/blank/blob/main/FS_iCSA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [82]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [83]:
import random as rnd
import math
import numpy as np
import pandas as pd
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score
from sklearn.decomposition import PCA
import sklearn.gaussian_process as gp
import tensorflow as tf
import time as tm

In [84]:
class ML:
  def __init__(self):
    # Leer el archivo CSV
    self.data = pd.read_csv('/content/drive/MyDrive/data.csv', header = 0)
    self.principal_components = self.find_principal_features()

  def find_principal_features(self):
    pca = PCA()
    principal_components = pca.fit_transform(self.data)
    explained_variance_ratio = pca.explained_variance_ratio_

    # Calculate the cumulative explained variance ratio
    cumulative_variance = np.cumsum(explained_variance_ratio)

    # Find the number of principal components that explain 95% of the variance
    num_components = np.argmax(cumulative_variance >= 0.95) + 1

    # Get the loadings of the first num_components
    loadings = pca.components_[:num_components]

    # Create a binary vector indicating principal features
    principal_features = np.zeros(self.data.shape[1])
    for loading in loadings:
        principal_features[np.abs(loading) >= 0.00001] = 1

    return principal_features

  def load_data(self, selected_features):
    # Usa las caracteristicas dadas por la MH
    all_features = list(self.data.columns)
    selected_column_names = [all_features[i] for i in range(len(selected_features)) if (selected_features[i] == 1)]
    return self.data[selected_column_names].values, self.data.iloc[:, -1].values.astype(float)

In [85]:
class MLSVM(ML):
  def train(self, selected_features):
    X, y = self.load_data(selected_features)

    # Dividir el conjunto de datos en conjuntos de entrenamiento y prueba
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

    # Normalizar características
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Entrenar un clasificador SVM
    svm = SVC()
    svm.fit(X_train, y_train)

    # Realizar predicciones en el conjunto de prueba
    y_pred = svm.predict(X_test)

    # Calcular las métricas de evaluación
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average = 'weighted')

    return [accuracy, f1]

In [86]:
class Crow:
  def __init__(self):
    self.dimension = 755 ## Depende del DSet
    self.position = []
    self.metrics = [0,0]
    self.pBest = []
    self.fBest = 0.0
    for j in range(self.dimension):
      self.position.append(rnd.randint(0,1))

  def countCols(self):
    sum = 0
    for j in range(self.dimension):
        sum += self.position[j]
    return sum

  def train(self, ml):
    self.metrics = ml.train(self.position)

  def is_better_than_pbest(self):
    if self.fitness() > self.fBest:
        self.fBest = self.fitness()
        return True
    else:
        return False

  def update_pbest(self):
        self.pBest = self.position[:]

  def isBetterThan(self, gBest):
    return self.fitness() > gBest.fBest

  def move(self, followed_crow: "Crow", AP: float, flight_length: float, ml):
    for j in range(self.dimension):
      if rnd.uniform(0, 1) >= AP:
        self.position[j] = int(self.position[j] + rnd.uniform(0, 1) * flight_length * (int(followed_crow.pBest[j]) - int(self.position[j])))
      else:
        self.position[j] = rnd.randint(0, 1)
    #self.position = np.logical_or(self.position, ml.principal_components)

  def fitness(self):
    z1 = self.metrics[0] # maximizar f1 score -> [0: malo, 1: bueno]
    z2 = self.metrics[1] # maximizar acc -> [0: malo, 1: bueno]
    z3 = self.countCols() / self.dimension # minimizar cantidad de columnas que se usan -> [0: bueno, 1: malo]
    return z1 * 0.4 + z2 * 0.4 - z3 * 0.2 + 0.2

  def copy(self, other: "Crow"):
    self.position = other.position.copy()
    self.fBest = other.fBest

  #def toBinary(self, x):
    #return 1 if rnd.uniform(0, 1) <= 1 / (1 + math.exp(-x)) else 0


In [87]:
class CSA:
  def __init__(self):
    self.T = 20 #iterMax
    self.nCrows = 20 #flock size
    self.AP = 0.5
    self.flightLenght = 0.75
    self.swarm = [] # array of Crow objects
    self.gCrow = Crow() #Aquí se guarda el mejor cuervo
    self.rnd = rnd
    self.sTime = None
    self.eTime = None
    self.bTime = None

  def execute(self, ml):
    self.startTime()
    self.init(ml)
    self.run(ml)
    self.endTime()
    self.log()

  def startTime(self):
    self.sTime = int(round(tm.time() * 1000))

  def init(self, ml):
    for i in range(self.nCrows):
      self.swarm.append(Crow())
      self.swarm[i].train(ml)
      if self.swarm[i].isBetterThan(self.gCrow):
        self.gCrow.copy(self.swarm[i])
      if self.swarm[i].is_better_than_pbest():
            self.swarm[i].update_pbest()

  def run(self, ml):
    t = 1
    while t <= self.T:
      for i in range(self.nCrows):
        followedCrow = self.swarm[self.rnd.randint(0, self.nCrows - 1)]
        self.swarm[i].move(followedCrow, self.AP, self.flightLenght, ml)
        self.swarm[i].train(ml)
        if self.swarm[i].is_better_than_pbest():
            self.swarm[i].update_pbest()
        if self.swarm[i].isBetterThan(self.gCrow):
            self.bTime = int(round(tm.time() * 1000))
            self.gCrow.copy(self.swarm[i])
      t += 1

  def endTime(self):
    self.eTime = int(round(tm.time() * 1000))

  def log(self):
    print(f"Mejor fitness: {self.gCrow.fBest}, Tiempo total: {self.eTime - self.sTime}ms, Tiempo mejor Fitness: {self.bTime - self.sTime}ms")


In [88]:
for i in range (30):
  try:
    CSA().execute(MLSVM())
  except Exception as e:
    print(f"{e} \nCaused by {e.args}")


Mejor fitness: 0.9435992162366065, Tiempo total: 16437ms, Tiempo mejor Fitness: 11154ms
Mejor fitness: 0.9428045142498513, Tiempo total: 15283ms, Tiempo mejor Fitness: 12333ms
Mejor fitness: 0.9451886202101163, Tiempo total: 15883ms, Tiempo mejor Fitness: 14106ms
Mejor fitness: 0.9412151102763415, Tiempo total: 15366ms, Tiempo mejor Fitness: 14536ms
Mejor fitness: 0.943864116898858, Tiempo total: 15218ms, Tiempo mejor Fitness: 10565ms
Mejor fitness: 0.9451886202101163, Tiempo total: 15070ms, Tiempo mejor Fitness: 9965ms
Mejor fitness: 0.9446588188856131, Tiempo total: 15873ms, Tiempo mejor Fitness: 9806ms
Mejor fitness: 0.9443939182233614, Tiempo total: 15131ms, Tiempo mejor Fitness: 5709ms
Mejor fitness: 0.9446588188856131, Tiempo total: 15188ms, Tiempo mejor Fitness: 14789ms
Mejor fitness: 0.9449237195478646, Tiempo total: 15201ms, Tiempo mejor Fitness: 6784ms
Mejor fitness: 0.9422747129253481, Tiempo total: 15200ms, Tiempo mejor Fitness: 12243ms
Mejor fitness: 0.9422747129253481, Ti