## Imports

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import LinearSVR
from sklearn.neighbors import KNeighborsRegressor

import random as rd
import numpy as np

import datetime as dt

## Simulated annealing

In [None]:
# used to generate initial arguments and set lists for ranges and steps
def init_arguments(regressor):
	arguments = []

	if regressor == "DecisionTreeRegressor":
		min_max_values = [[1, 10], [2, 10], [1, 10]]
		integer_values = [True, True, True]
		changes = [[1, 0, -1], [1, 0, -1], [1, 0, -1]]
	if regressor == "LinearSVR":
		min_max_values = [[0, 1], [0, 2], [0, 5], [0, 2]]
		integer_values = [True, True, True, True]
		changes = [[1, 0, -1], [1, 0, -1], [1, 0, -1], [1, 0, -1]]
	if regressor == "KNN":
		min_max_values = [[1, 10], [0, 1], [1, 2]]
		integer_values = [True, True, True]
		changes = [[1, 0, -1], [1, 0, -1], [1, 0, -1]]

	for i, mm in enumerate(min_max_values): 
		if not integer_values or integer_values[i]:
			arguments.append(rd.randint(mm[0], mm[1]))
		else:
			arguments.append(rd.uniform(mm[0], mm[1]))

	return arguments, min_max_values, integer_values, changes

In [None]:
def get_mutation_value(value, digit, base):
	return (value % base**(digit+1)) // base**digit

def get_random_full_mutation(arguments, changes, min_max_values):
	changes_per_value = len(changes[0])
	total_changes = len(changes)**changes_per_value

	mutated_arguments = []

	i = rd.randint(0, total_changes)

	mutated_arguments = arguments.copy()

	for j in range(len(mutated_arguments)):
		mutated_arguments[j] += changes[j][get_mutation_value(i, j, changes_per_value)]
		mutated_arguments[j] = max(min_max_values[j][0], min(mutated_arguments[j], min_max_values[j][1]))

	return mutated_arguments

In [None]:
# calculates the negative mean squared error for evaluation
def calculate_quality(arguments, regressor, X, y, scoring, n=5):
	if regressor == "DecisionTreeRegressor":
		regressor  = DecisionTreeRegressor(
			max_depth = arguments[0],
			min_samples_split = arguments[1],
			min_samples_leaf = arguments[2]
			)
	if regressor == "LinearSVR":
		loss = ["epsilon_insensitive", "squared_epsilon_insensitive"]
		tol = [1e-5, 1e-3, 1e-1]
		C = [1e-4, 1e-2, 0.5, 1, 10, 20]
		epsilon = [1e-3, 1e-1, 1]
		regressor  = LinearSVR(
			loss = loss[arguments[0]],
			tol = tol[arguments[1]],
			C = C[arguments[2]],
			epsilon = epsilon[arguments[3]],
			max_iter = 4000
			)
	if regressor == "KNN":
		weights = ["uniform", "distance"]
		regressor  = KNeighborsRegressor(
			n_neighbors = arguments[0],
			weights = weights[arguments[1]],
			p = arguments[2]
			)

	cv = cross_val_score(regressor, X, y, cv=5, scoring=scoring)

	return (sum(cv)/len(cv))

In [None]:
# evaluates a set of argument sets and sorts them
def evaluate(mutated_arguments, regressor, X, y, scoring):
	return [calculate_quality(mutated_arguments, regressor, X, y, scoring), arguments]

In [None]:
def simmulated_annealing(X, y, regressor, init_arg, changes, min_max_values, integer_values, scoring):
	best_arguments = init_arg
	best_score = evaluate(best_arguments, regressor, X, y, scoring)[0]
	current_arguments = best_arguments
	current_score = best_score

	start_arguments = best_arguments
	start_score = best_score

	no_change_since = 0
	max_no_change_since = 5
    
	generations_to_kill = 2000

	temperature = 1000

	print([best_score, best_arguments])

	while(no_change_since < max_no_change_since and generations_to_kill > 0):
		# mutate
		mutated_arguments = get_random_full_mutation(current_arguments, changes, min_max_values)
		# evaluate
		evaluated_arguments = evaluate(mutated_arguments, regressor, X, y, scoring)

		npl = -np.log(np.random.rand()) * temperature
		diff = (current_score - evaluated_arguments[0])

		if npl > diff:
			current_score = evaluated_arguments[0]
			current_arguments = evaluated_arguments[1]
			no_change_since = 0
		elif temperature < 1:
			no_change_since += 1

		if current_score >= best_score:
			best_score = current_score
			best_arguments = current_arguments

		temperature = 0.9 * temperature
		generations_to_kill -= 1

	return best_score, best_arguments

## Method calls

In [None]:
X = np.loadtxt('../../datasets/covid/covid-vaccination-vs-death_ratio_input.data',delimiter=";")
y = np.loadtxt('../../datasets/covid/covid-vaccination-vs-death_ratio_classes.data',delimiter=";")

arguments, min_max_values, integer_values, changes = init_arguments("DecisionTreeRegressor")
best_score, best_arguments = simmulated_annealing(X, y, "DecisionTreeRegressor", arguments, changes, min_max_values, integer_values, "neg_mean_squared_error")
print("DecisionTreeRegressor", "nmse", best_score, best_arguments)

arguments, min_max_values, integer_values, changes = init_arguments("LinearSVR")
best_score, best_arguments = simmulated_annealing(X, y, "LinearSVR", arguments, changes, min_max_values, integer_values, "neg_mean_squared_error")
print("LinearSVR", "nmse", best_score, best_arguments)

arguments, min_max_values, integer_values, changes = init_arguments("KNN")
best_score, best_arguments = simmulated_annealing(X, y, "KNN", arguments, changes, min_max_values, integer_values, "neg_mean_squared_error")
print("KNN", "nmse", best_score, best_arguments)

print()

arguments, min_max_values, integer_values, changes = init_arguments("DecisionTreeRegressor")
best_score, best_arguments = simmulated_annealing(X, y, "DecisionTreeRegressor", arguments, changes, min_max_values, integer_values, "r2")
print("DecisionTreeRegressor", "r2", best_score, best_arguments)

arguments, min_max_values, integer_values, changes = init_arguments("LinearSVR")
best_score, best_arguments = simmulated_annealing(X, y, "LinearSVR", arguments, changes, min_max_values, integer_values, "r2")
print("LinearSVR", "r2", best_score, best_arguments)

arguments, min_max_values, integer_values, changes = init_arguments("KNN")
best_score, best_arguments = simmulated_annealing(X, y, "KNN", arguments, changes, min_max_values, integer_values, "r2")
print("KNN", "r2", best_score, best_arguments)