In [1]:
# !pip install jmetalpy

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
sns.set()

In [2]:
df_hd = pd.read_csv('../Data/HD_dataset_full.csv')
df_hd.rename(columns={'Unnamed: 0':'Samples'},inplace=True)
df_hd['Grade'] = df_hd['Grade'].map({'-':'Control',
                                     '0':'HD_0',
                                     '1':'HD_1',
                                     '2':'HD_2',
                                     '3':'HD_3',
                                     '4':'HD_4'})
df_hd.head(1)

Unnamed: 0,Samples,ENSG00000000457,ENSG00000001461,ENSG00000001497,ENSG00000001626,ENSG00000002016,ENSG00000002745,ENSG00000002746,ENSG00000003137,ENSG00000003147,...,ENSG00000276644,ENSG00000277443,ENSG00000277893,ENSG00000278259,ENSG00000278311,ENSG00000279152,ENSG00000279519,ENSG00000280099,ENSG00000280109,Grade
0,GSM86787,-0.030418,1.193756,1.852312,0.122721,-0.134031,0.178517,1.110357,-0.059366,0.528582,...,2.717965,7.964445,0.767224,1.14174,2.766475,-0.119175,2.737949,0.490229,1.250728,Control


In [3]:
# from jmetal.core.problem import BinaryProblem
from jmetal.core.solution import BinarySolution
from jmetal.algorithm.singleobjective import GeneticAlgorithm
from jmetal.operator import BinaryTournamentSelection, SBXCrossover, BitFlipMutation, DifferentialEvolutionCrossover, PolynomialMutation, CXCrossover, SPXCrossover
from jmetal.util.termination_criterion import StoppingByEvaluations

In [4]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split,KFold
from sklearn.preprocessing import LabelEncoder

In [5]:
class FeatureSelectionProblem():
  def __init__(self,X,y):
    self.X = X
    self.y = y
    self.number_of_variables = X.shape[1]
    self.number_of_objectives = 1
    self.number_of_constraints = 0

  def evaluate(self, solution):
    selected_features = np.flatnonzero(solution.variables)
    X_selected = self.X.iloc[:, selected_features]
    Xtrain,Xtest,ytrain,ytest = train_test_split(X_selected,self.y)

    model = DecisionTreeClassifier()
    model.fit(Xtrain, ytrain)
    y_pred = model.predict(Xtest)
    acc = accuracy_score(ytest, y_pred)

    solution.objectives[0] = acc
    solution.constraints = []

  def create_solution(self):
    new_solution = BinarySolution(
        number_of_variables = self.number_of_variables,
        number_of_objectives = self.number_of_objectives,
        number_of_constraints = self.number_of_constraints
    )
    # new_variables = [list(np.random.randint(0, 2, size=1).tolist()[0] for _ in range(self.number_of_variables))]
    new_variables = [np.random.randint(0, 2, size=1)[0] for _ in range(self.number_of_variables)]
    new_solution.variables = new_variables
    return new_solution

  def get_name(self):
    return "FeatureSelectionProblem"


In [10]:
class FeatureSelectionGA():
  def __init__(self,X,y,alfa):
    self.X = X
    self.y = y
    self.alfa = alfa
    self.number_of_variables = X.shape[1]
    self.number_of_objectives = 1
    self.number_of_constraints = 0

  def evaluate(self, solution):
    selected_features = np.flatnonzero(solution.variables)
    X_selected = self.X[:, selected_features]

    kf = KFold(n_splits=4, shuffle=True, random_state=42)
    scores = []
    model = SVC()
    for trainI, testI in kf.split(X_selected):
      X_train, X_test = X_selected[trainI], X_selected[testI]
      y_train, y_test = self.y[trainI], self.y[testI]
      model.fit(X_train, y_train)
      y_pred = model.predict(X_test)
      acc = accuracy_score(y_test, y_pred)
      scores.append(acc)

    acc_avg = np.mean(scores)
    num_variables = len(selected_features)
    beta = 1 - self.alfa
    fitness = 1.0 - (num_variables/self.X.shape[1]) # Primera parte de la función agregativa
    fitness = (self.alfa * fitness) + (beta * acc_avg)
    solution.objectives[0] = 1-fitness
    solution.constraints = []

  def create_solution(self):
      new_solution = BinarySolution(
          number_of_variables = self.number_of_variables,
          number_of_objectives = self.number_of_objectives,
          number_of_constraints = self.number_of_constraints
      )
      new_solution.variables = [True if np.random.randint(0, 1) == 0 else False for _ in range(self.number_of_variables)]
      new_solution.objectives = [0 for _ in range(self.number_of_objectives)]
      new_solution.constraints = [0 for _ in range(self.number_of_constraints)]
      return new_solution

  def get_name(self):
    return "FeatureSelectionGA"
  
#DATA
df_hd = pd.read_csv('../Data/HD_filtered.csv')
encoder = LabelEncoder()
X = df_hd.drop(columns=['Samples','Grade']).to_numpy()
y = encoder.fit_transform(df_hd.Grade.to_numpy())
clases = list(df_hd.columns[:-2])

problem = FeatureSelectionGA(X,y,0.9)

algorithm = GeneticAlgorithm(
    problem=problem,
    population_size=100,
    offspring_population_size=100,
    mutation=BitFlipMutation(0.01),
    crossover=SPXCrossover(0.9),
    selection=BinaryTournamentSelection(),
    termination_criterion=StoppingByEvaluations(max_evaluations=1000)
)

algorithm.run()

[2024-05-27 20:24:43,978] [jmetal.core.algorithm] [DEBUG] Creating initial set of solutions...
[2024-05-27 20:24:44,141] [jmetal.core.algorithm] [DEBUG] Evaluating solutions...
[2024-05-27 20:24:52,588] [jmetal.core.algorithm] [DEBUG] Initializing progress...
[2024-05-27 20:24:52,588] [jmetal.core.algorithm] [DEBUG] Running main loop until termination criteria is met


StopbyEvals:  100


TypeError: object of type 'bool' has no len()