In [10]:
import numpy as np
import pandas as pd
from typing import Tuple
from datetime import datetime
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split
base_path = "C:/Users/99818854/Projetos/GitRep/adaptive_learning"

In [11]:
base = pd.read_csv(f"{base_path}/data/mastery.csv")
submit = pd.read_csv(f"{base_path}/data/Submit.csv", sep=";")

In [12]:
def objective_function(params: np.array):
    mastery = np.dot(base.values[:, 3:-1], params)
    X, y = np.concatenate((base.values[:, 2:3], mastery), axis=1), base.values[:, -1]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

    mean_difficulty = round(np.nanmean(X_train[:, 0]))
    X_train[np.isnan(X_train)] = mean_difficulty
    X_test[np.isnan(X_test)] = mean_difficulty

    model = SGDClassifier(loss="log")
    model.fit(X_train, y_train)

    y_pred_test = model.predict(X_test)
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred_test).ravel()

    accuracy = (tp + tn) / (tn + fp + fn + tp)
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    f1 = 2 * (precision * recall) / (precision + recall)

    return accuracy, precision, recall, f1

In [13]:
def random_params():
    # ganho | dificuldade 1 | dificuldade 2 | dificuldade 3 | dificuldade 4 | dificuldade 5
    return [
        np.random.randint(1, 5),
        np.random.random(),
        np.random.random(),
        np.random.random(),
        np.random.random(),
        np.random.random()
    ]

In [14]:
def evaluate(population: list):
    results = []
    start_date = datetime.now()
    for i in range(0, len(population)):
        print(f"Avaliando o indivíduo {i + 1}/{len(population)} da população...", end="\r")
        accuracy, precision, recall, f1 = objective_function(np.array(population[i]).reshape((len(population[i]), 1)))
        results.append(f1)

    end_date = datetime.now()
    duration = round((end_date - start_date).seconds / 60, 2)
    print(f"Melhor F1-Score encontrado: {f1} ({duration} min)")
    return results

In [15]:
def select(population, results):
    combination = list(zip(population, results))
    select_1 = combination[np.random.randint(0, len(combination))]
    select_2 = combination[np.random.randint(0, len(combination))]
    return select_1[0] if select_1[1] > select_2[1] else select_2[0]

In [16]:
def crossover(father, mother):
    element_1, element_2 = father, mother
    if np.random.randint(0, 100) < 50:
        point = np.random.randint(0, len(father))
        element_1 = father[:point] + mother[point:]
        element_2 = mother[:point] + father[point:]
    return element_1, element_2

In [17]:
def mutate(element):
    for i in range(0, len(element)):
        if np.random.randint(0, 100) < 2:
            element[i] = element[i] * np.random.random()

In [18]:
generations = 100
qntd_individuals = 10
current_population = [random_params() for i in range(0, qntd_individuals)]
print("Geração primitiva:")
current_results = evaluate(current_population)

for i in range(0, generations):
    print(f"\nGeração {i + 1}")
    new_population = []

    while len(new_population) < qntd_individuals:
        father = select(current_population, current_results)
        mother = select(current_population, current_results)
        element_1, element_2 = crossover(father, mother)
        mutate(element_1)
        mutate(element_2)
        new_population.append(element_1)
        new_population.append(element_2)

    current_population = new_population
    current_results = evaluate(current_population)

Geração primitiva:
Melhor F1-Score encontrado: 0.7907065474670415 (3.6 min)
Geração 1
Melhor F1-Score encontrado: 0.5478439071927084 (3.8 min)
Geração 2
Melhor F1-Score encontrado: 0.4886652117757092 (4.35 min)
Geração 3
Melhor F1-Score encontrado: 0.791091691599772 (4.25 min)
Geração 4
Melhor F1-Score encontrado: 0.7918809654327358 (3.98 min)
Geração 5
Melhor F1-Score encontrado: 0.7916090907899763 (4.22 min)
Geração 6
Melhor F1-Score encontrado: 0.7903139984188511 (3.87 min)
Geração 7
Melhor F1-Score encontrado: 0.7913903343024722 (4.48 min)
Geração 8
Melhor F1-Score encontrado: 0.7911095529389007 (5.33 min)
Geração 9
Melhor F1-Score encontrado: 0.7905992223554635 (5.95 min)
Geração 10
Melhor F1-Score encontrado: 0.3310939444973274 (5.68 min)
Geração 11
Melhor F1-Score encontrado: 0.39595924415944495 (6.02 min)
Geração 12
Melhor F1-Score encontrado: 0.49700315986092963 (5.15 min)
Geração 13
Melhor F1-Score encontrado: 0.7903139817604627 (4.82 min)
Geração 14
Melhor F1-Score encontrad

In [19]:
best_f1, best_population = 0, []
for population, f1 in zip(current_population, current_results):
    if f1 > best_f1:
        best_f1 = f1
        best_population = population

print(f"Melhores parâmetros:")
print(best_population)

Melhores parâmetros:
[0.0004323906784531435, 0.00010716995653383348, 0.09686397157158075, 0.038338290750789344, 0.1925561952638974, 7.024947822136203e-06]
