## Imports

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import LinearSVR
from sklearn.neighbors import KNeighborsRegressor

import random as rd
import numpy as np

import datetime as dt

## Hillclimbing

Hillclimbing takes a random initial set of hyperparameter and searches its direct neighborhood for better soltions, repeating the process until no better solution can be found.

In [None]:
# used to generate initial arguments and set lists for ranges and steps
def init_arguments(regressor):
	arguments = []

	if regressor == "DecisionTreeRegressor":
		min_max_values = [[1, 10], [2, 10], [1, 10]]
		integer_values = [True, True, True]
		changes = [[1, 0, -1], [1, 0, -1], [1, 0, -1]]
	if regressor == "LinearSVR":
		min_max_values = [[0, 1], [0, 2], [0, 5], [0, 2]]
		integer_values = [True, True, True, True]
		changes = [[1, 0, -1], [1, 0, -1], [1, 0, -1], [1, 0, -1]]
	if regressor == "KNN":
		min_max_values = [[1, 10], [0, 1], [1, 2]]
		integer_values = [True, True, True]
		changes = [[1, 0, -1], [1, 0, -1], [1, 0, -1]]

	for i, mm in enumerate(min_max_values): 
		if not integer_values or integer_values[i]:
			arguments.append(rd.randint(mm[0], mm[1]))
		else:
			arguments.append(rd.uniform(mm[0], mm[1]))

	return arguments, min_max_values, integer_values, changes

In [None]:
# returns all argument sets in the direct neighborhood of the given argument set
def mutate_arguments_direct_neighborhood(arguments, changes, min_max_values, include_original = True):
	changes_per_value = len(changes[0])
	total_changes = len(changes)**changes_per_value

	mutated_arguments = []

	for i in range(len(arguments)):
		for j in range(len(changes[i])):
			new_arguments = arguments.copy()
			new_arguments[i] += changes[i][j]
			new_arguments[i] = max(min_max_values[i][0], min(new_arguments[i], min_max_values[i][1]))
			if (include_original or new_arguments != arguments) and new_arguments not in arguments:
				mutated_arguments.append(new_arguments)

	return mutated_arguments

In [None]:
# calculates the negative mean squared error for evaluation
def calculate_quality(arguments, regressor, X, y, scoring, n=5):
	if regressor == "DecisionTreeRegressor":
		regressor  = DecisionTreeRegressor(
			max_depth = arguments[0],
			min_samples_split = arguments[1],
			min_samples_leaf = arguments[2]
			)
	if regressor == "LinearSVR":
		loss = ["epsilon_insensitive", "squared_epsilon_insensitive"]
		tol = [1e-5, 1e-3, 1e-1]
		C = [1e-4, 1e-2, 0.5, 1, 10, 20]
		epsilon = [1e-3, 1e-1, 1]
		regressor  = LinearSVR(
			loss = loss[arguments[0]],
			tol = tol[arguments[1]],
			C = C[arguments[2]],
			epsilon = epsilon[arguments[3]],
			max_iter = 4000
			)
	if regressor == "KNN":
		weights = ["uniform", "distance"]
		regressor  = KNeighborsRegressor(
			n_neighbors = arguments[0],
			weights = weights[arguments[1]],
			p = arguments[2]
			)

	cv = cross_val_score(regressor, X, y, cv=5, scoring=scoring)

	return (sum(cv)/len(cv))

In [None]:
# evaluates a set of argument sets and sorts them
def evaluate(mutated_arguments, regressor, X, y, scoring):
	evaluated_arguments = []

	for arguments in mutated_arguments:	
		evaluated_arguments.append([calculate_quality(arguments, regressor, X, y, scoring), arguments])
	
	evaluated_arguments.sort(reverse = False)
	
	return evaluated_arguments

In [None]:
def hill_climbing(X, y, regressor, init_arg, changes, min_max_values, integer_values, scoring):
	best_arguments = init_arg
	best_score = evaluate([best_arguments], regressor, X, y, scoring)[0][0]

	start_arguments = best_arguments
	start_score = best_score

	no_change_since = 0
	max_no_change_since = 1

	while(no_change_since < max_no_change_since):
		# mutate
		mutated_arguments = mutate_arguments_direct_neightborhood(best_arguments, changes, min_max_values)
		evaluated_arguments = evaluate(mutated_arguments, regressor, X, y, scoring)


		if evaluated_arguments[0][0] < best_score:
			best_score = evaluated_arguments[0][0]
			best_arguments = evaluated_arguments[0][1]
		else:
			no_change_since+=1

	return best_score, best_arguments

## Method calls

In [None]:
X = np.loadtxt('../datasets/exercise2/solarflares/flare_input.data',delimiter=";")
y = np.loadtxt('../datasets/exercise2/solarflares/flare_classes.data',delimiter=";")[:,0]

#X = np.loadtxt('../datasets/exercise2/wine/wine_red_input.data',delimiter=";")
#y = np.loadtxt('../datasets/exercise2/wine/wine_red_classes.data',delimiter=";")

#X = np.loadtxt('../datasets/exercise2/wine/wine_white_input.data',delimiter=";")
#y = np.loadtxt('../datasets/exercise2/wine/wine_white_classes.data',delimiter=";")

#X = np.loadtxt('../datasets/exercise2/covid/covid-vaccination-vs-death_ratio_input.data',delimiter=";")
#y = np.loadtxt('../datasets/exercise2/covid/covid-vaccination-vs-death_ratio_classes.data',delimiter=";")

arguments, min_max_values, integer_values, changes = init_arguments("DecisionTreeRegressor")
best_score, best_arguments = hill_climbing(X, y, "DecisionTreeRegressor", arguments, changes, min_max_values, integer_values, "neg_mean_squared_error")
print("DecisionTreeRegressor", "nmse", best_score, best_arguments)

arguments, min_max_values, integer_values, changes = init_arguments("LinearSVR")
best_score, best_arguments = hill_climbing(X, y, "LinearSVR", arguments, changes, min_max_values, integer_values, "neg_mean_squared_error")
print("LinearSVR", "nmse", best_score, best_arguments)

arguments, min_max_values, integer_values, changes = init_arguments("KNN")
best_score, best_arguments = hill_climbing(X, y, "KNN", arguments, changes, min_max_values, integer_values, "neg_mean_squared_error")
print("KNN", "nmse", best_score, best_arguments)

print()

arguments, min_max_values, integer_values, changes = init_arguments("DecisionTreeRegressor")
best_score, best_arguments = hill_climbing(X, y, "DecisionTreeRegressor", arguments, changes, min_max_values, integer_values, "r2")
print("DecisionTreeRegressor", "r2", best_score, best_arguments)

arguments, min_max_values, integer_values, changes = init_arguments("LinearSVR")
best_score, best_arguments = hill_climbing(X, y, "LinearSVR", arguments, changes, min_max_values, integer_values, "r2")
print("LinearSVR", "r2", best_score, best_arguments)

arguments, min_max_values, integer_values, changes = init_arguments("KNN")
best_score, best_arguments = hill_climbing(X, y, "KNN", arguments, changes, min_max_values, integer_values, "r2")
print("KNN", "r2", best_score, best_arguments)