In [141]:
# #!pip install modestga --upgrade --quiet
# #!pip install deap --upgrade --quiet

# import warnings
# warnings.filterwarnings("ignore")

In [142]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error

In [None]:
"""Interpolation Funktions"""
# returns coefficients (polynominal and log-curve-fitting) or spline function

# Polynominal (1 to degree)
def polynomial_fit(x, y, degree):
    return np.polyfit(x, y, degree)

# logarithmic polynominal (1 to 4), but for curve fitting
    # functions for curve fitting
def poly_log_1_logarithmic_function(x_log, a, b):
    return a * x_log + b

def poly_log_2_logarithmic_function(x_log, a, b, c):
    return (a * x_log + b)*x_log + c

def poly_log_3_logarithmic_function(x_log, a, b, c, d):
    return ((a * x_log + b)*x_log + c) * x_log + d

def poly_log_4_logarithmic_function(x_log, a, b, c, d, e):
    return (((a * x_log + b)*x_log + c) * x_log + d) * x_log + e

    # curve fitting is done here
from scipy.optimize import curve_fit
def log_fitting_coefficients(x_values, y_values, logarithmic_function):
    coeff, _ = curve_fit(logarithmic_function, x_values, y_values)
    return coeff
 
# spline interpolation
from scipy.interpolate import CubicSpline, interp1d

def spline_fitting_linear(x_values, y_values):
    return interp1d(x_values, y_values, kind='linear')

def spline_fitting_quadratic(x_values, y_values):
    return interp1d(x_values, y_values, kind='quadratic')
 
def spline_fitting_cubic(x_values, y_values):
    return CubicSpline(x_values, y_values)

In [None]:
"""Estimations for MSE"""
# returns estimations for y_value through different fittings

# polynominal
def polynominal_estimation(x, coefficients):
    return np.polyval(coefficients, x)

# log
def log_fitting_estimates(x_values, coefficients):
    if len(coefficients)<2 or len(coefficients)>5:
        return NotImplementedError("length of coefficients don't match any function")
    else:
        log_function_dict = {
            2: poly_log_1_logarithmic_function, 
            3: poly_log_2_logarithmic_function, 
            4: poly_log_3_logarithmic_function,
            5: poly_log_4_logarithmic_function}
        
        return np.array(log_function_dict[len(coefficients)](x_values, *coefficients))

# spline
def spline_fitting_estimates(x_values, spline_function):
    return spline_function(x_values)


In [None]:
"""calculating and comparing mean square error"""
def comparing_fitting_methods(x_values, y_values):
    log_function_dict = {
        2: poly_log_1_logarithmic_function, 
        3: poly_log_2_logarithmic_function, 
        4: poly_log_3_logarithmic_function,
        5: poly_log_4_logarithmic_function}
    
    comparing_dict = {}
    
    min_mse = 0
    method = ''
    coefficients = None
    best_function = None
    x_log = np.log(x_values)
    
    for i in range(5):
        coeff_poly = polynomial_fit(x_values, y_values, i)
        coeff_log = log_fitting_coefficients(x_log, y_values, log_function_dict[i+1])
    
    spline_linear = spline_fitting_linear(x_values, y_values)
    spline_quadratic = spline_fitting_quadratic(x_values, y_values)
    spline_cubic = spline_fitting_cubic(x_values, y_values)
    
    
    
    return {
        'min_mse': min_mse, 
        'method': method, 
        'coefficients': coefficients, 
        'best_function': best_function}
 
    

In [143]:
"""
Init
"""

#===========================================================
#                   Read Files
#===========================================================

# Files
file = pd.ExcelFile('Result-test.xlsx')


# Nodeproperty
df_node_property = pd.read_excel(file, 'Node_Property')
node_names_node_property = df_node_property['Node Name'].values
memory_values = df_node_property['Memory'].values
tsk_values = df_node_property['tsk'].values

# Values
df_general_results = pd.read_excel(file, 'General-Results')
node_names_general_results = df_node_property['Node Name'].values
x_values = df_general_results['Data Size'].values
y_values = df_general_results['Total-time'].values

#===========================================================
#                   Initializing Variables
#===========================================================

# Genom Variable
genomes_df= pd.read_csv('overview-test.csv')

nodenames = np.unique(node_names_node_property)
genome_names = genomes_df['GenomeName'].values
number_of_gens = len(genome_names)
size_of_genes = genomes_df['Size'].values
Job_max = number_of_gens

# Genetic Algorithm parameters
population_number=20
MutationRate= 0.4
CrossoverRate= 1 - MutationRate
maxIter = 120
MutationNumber = round (population_number* MutationRate);
CrossoverNumber = round (population_number * CrossoverRate);

In [144]:

"""
overall Functions
"""
def sol(df_general_results):
    Solutions_polynominal = {}
    Solutions_logarithmic = {}
    Solutions_spline = {}

    for name in nodenames:
        df = df_general_results[df_general_results['Node Name'] == name]
        x_values = df['Data Size'].values
        y_values = df['Total-time'].values
        
        unique_x, unique_indices = np.unique(x_values, return_index=True)
        unique_y = y_values[unique_indices]

        sorted_indices = np.argsort(unique_x)
        x_sorted = unique_x[sorted_indices]
        y_sorted = unique_y[sorted_indices]
        Solutions_spline[name] = spline_fitting(x_sorted, y_sorted)
        Solutions_logarithmic[name] = log_fitting(x_sorted, y_sorted)
        Solutions_polynominal[name] = polynominal_fitting(x_sorted, y_sorted)
    
    return Solutions_polynominal, Solutions_logarithmic, Solutions_spline


In [145]:
from scipy.interpolate import CubicSpline, interp1d


def spline_fitting(x_values, y_values):

    # no quadratic
    linear_interp = interp1d(x_values, y_values, kind='linear')
    
    quadratic_interp = interp1d(x_values, y_values, kind='quadratic')
    
    cubic_spline = CubicSpline(x_values, y_values)

    # Values for the interpolated functions
    y_linear = linear_interp(x_values)
    y_quadratic = quadratic_interp(x_values)
    y_cubic = cubic_spline(x_values)

    mse_linear = mean_squared_error(y_values, y_linear)
    mse_quadratic = mean_squared_error(y_values, y_quadratic)
    mse_cubic = mean_squared_error(y_values, y_cubic)
    
    return y_linear, y_quadratic, y_cubic, np.array([mse_linear, mse_quadratic, mse_cubic])


def find_optimum_spline(Solutions):
    Optimum = {}
    for name in nodenames:
        _,_,_, mse = Solutions[name]
        min_index = mse.argmin()
        Optimum[name] = min_index, mse[min_index], Solutions[name][min_index]

    return Optimum
      

In [146]:
def polynomial_fit(x, y, degree):
    coefficients = np.polyfit(x, y, degree)
    estimated_y = np.polyval(coefficients, x)
    return mean_squared_error(y, estimated_y), coefficients, estimated_y

def polynominal_fitting(x_values, y_values):
    mse_list = np.empty(4)
    y_array = np.empty((4, len(y_values)))
    coeff_list = {}
    
    for i in range(1,5):
        mse, coeff, est_y = polynomial_fit(x_values, y_values, i)
        mse_list[i-1] = mse
        y_array[i-1] = est_y
        coeff_list[i-1] = coeff
        
    return y_array, coeff_list, mse_list
    

def find_optimum_polynominal(solutions):
    Optimum = {}
    for name in nodenames:
        est_y, coeff, mse = solutions[name]
        min_index = mse.argmin()
        Optimum[name] = min_index, mse[min_index], coeff[min_index], est_y[min_index]

    return Optimum

{201: (0, 3.1554436208840474e-31, array([0.04, 4.62]), array([4.7 , 4.74, 4.78, 4.82, 4.86])), 202: (3, 8.480254731125877e-30, array([-4.16666667e-04,  1.58333333e-02, -2.04583333e-01,  1.44916667e+00,
       -6.00000000e-01]), array([1.6 , 2.3 , 2.83, 3.25, 3.61])), 203: (3, 4.1020767071492615e-30, array([-4.16666667e-04,  1.75000000e-02, -2.39583333e-01,  1.83250000e+00,
        7.70000000e-01]), array([3.61, 4.55, 5.28, 5.87, 6.38]))}


{201: (0,
  3.1554436208840474e-31,
  array([0.04, 4.62]),
  array([4.7 , 4.74, 4.78, 4.82, 4.86])),
 202: (3,
  8.480254731125877e-30,
  array([-4.16666667e-04,  1.58333333e-02, -2.04583333e-01,  1.44916667e+00,
         -6.00000000e-01]),
  array([1.6 , 2.3 , 2.83, 3.25, 3.61])),
 203: (3,
  4.1020767071492615e-30,
  array([-4.16666667e-04,  1.75000000e-02, -2.39583333e-01,  1.83250000e+00,
          7.70000000e-01]),
  array([3.61, 4.55, 5.28, 5.87, 6.38]))}

In [None]:
from scipy.optimize import curve_fit

def poly_log_1_logarithmic_function(x, a, b):
    return a * np.log(x) + b

def poly_log_2_logarithmic_function(x, a, b, c):
    x_log = np.log(x)
    return (a * x_log + b)*x_log + c

def poly_log_3_logarithmic_function(x, a, b, c, d):
    x_log = np.log(x)
    return ((a * x_log + b)*x_log + c) * x_log + d

def poly_log_4_logarithmic_function(x, a, b, c, d, e):
    x_log = np.log(x)
    return (((a * x_log + b)*x_log + c) * x_log + d) * x_log + e


def log_fitting_coefficients(x_values, y_values, logarithmic_function):
    coeff, _ = curve_fit(logarithmic_function, x_values, y_values)
    return coeff
    
def log_fitting_estimates(x_values, coeff):
    function_dict = {2: poly_log_1_logarithmic_function, 
                     3: poly_log_2_logarithmic_function, 
                     4: poly_log_3_logarithmic_function,
                     5: poly_log_4_logarithmic_function}
    logarithmic_function = function_dict[len(coeff)]
    
    return np.array(logarithmic_function(x_values, *coeff))

In [None]:
def find_overall_optimum(Solutions_polynominal, Solutions_logarithmic, Solutions_spline):
    opt_spline = find_optimum_spline(Solutions_spline)
    opt_poly = find_optimum_polynominal(Solutions_polynominal)
    
    for name in nodenames:
        min_index_spline, mse_spline, y_spline = opt_spline[name]
        min_index_poly, mse_pol, coeff_pol, y_poly = opt_poly[name]
        y_log, coeff_log, mse_log = Solutions_logarithmic[name]
        
        
        
    