In [None]:
import random as rn
import numpy as np
import pandas as pd
import seaborn as sns
import time as t
import matplotlib.pyplot as plt
import scipy.stats as ss

from cfair.backends import NumpyBackend
from cfair.metrics.kernel.hgr import DoubleKernelHGR
from experiments import warriors, animals, cramers_v, sample, my_kernel_one_hot, my_kernel_one_hot_with_delta, polynomial_kernel, experiment, experiment_norm, experiment_delta, experiment_general, experiment_hot_poly
from ucimlrepo import fetch_ucirepo 

from scipy.linalg import qr # is the last column always removed?

In [None]:
def vanilla_cramers_v(confusion_matrix):

    chi2 = ss.chi2_contingency(confusion_matrix)[0]
    n = confusion_matrix.sum()
    phi2 = chi2 / n
    r, k = confusion_matrix.shape

    return np.sqrt(phi2 / min((k - 1), (r - 1)))

In [None]:
np.unique(warriors)

array(['Blood Angel', 'Dark Angel', 'Imperial Fist', 'Iron Hand',
       'Raven Guard', 'Salamander', 'Space Wolf', 'Ultramarine',
       'White Scar'], dtype='<U13')

In [None]:
np.unique(animals)

array(['cat', 'dog', 'horse', 'wolf'], dtype='<U5')

In [None]:
backend = NumpyBackend()

# Cramer's V

This is the bias-corrected version

In [None]:
def cramers_v(confusion_matrix):
    """ calculate Cramers V statistic for categorial-categorial association.
        uses correction from Bergsma and Wicher,
        Journal of the Korean Statistical Society 42 (2013): 323-328
    """
    chi2 = ss.chi2_contingency(confusion_matrix)[0]
    n = confusion_matrix.sum()
    phi2 = chi2 / n
    r, k = confusion_matrix.shape
    phi2corr = max(0, phi2 - ((k - 1) * (r - 1)) / (n - 1))
    rcorr = r - ((r - 1) ** 2) / (n - 1)
    kcorr = k - ((k - 1) ** 2) / (n - 1)
    return np.sqrt(phi2corr / min((kcorr - 1), (rcorr - 1)))

# OHE kernel function

In [None]:
def one_hot_encode(x):
        unique_vals = np.unique(x)
        return np.array([[1 if val == xi else 0 for val in unique_vals] for xi in x]).transpose()

# Sample function

In [None]:
def sample(a_list):
    return rn.choice(a_list)

In [None]:
warriors = ["Ultramarine", "Salamander", "White Scar", "Space Wolf", "Raven Guard", "Iron Hand", "Imperial Fist", "Blood Angel", "Dark Angel"]
animals = ["cat", "dog", "wolf", "horse"]

# One hot Kernel

In [None]:
my_kernel_one_hot = DoubleKernelHGR(
    backend=backend,          
    kernel_a=one_hot_encode, 
    kernel_b=one_hot_encode, 
)

# One hot Kernel with delta

In [None]:
my_kernel_one_hot_with_delta = DoubleKernelHGR(
    backend=backend,          
    kernel_a=one_hot_encode, 
    kernel_b=one_hot_encode, 
    delta_independent=0.2
)

# Polinomial Kernel

In [None]:
polynomial_kernel = DoubleKernelHGR(
    backend=backend,          
    kernel_a=9, 
    kernel_b=4, 
    delta_independent=0.2
)

# Experiments utilities

## General

In [None]:
def compute_corr_and_exec_time(feat1, feat2, kernel = "cramer", init = None, norm = True):

    if kernel == "cramer":
        start_time = t.time()
        confusion_matrix = pd.crosstab(feat1, feat2)
        result = cramers_v(confusion_matrix.values)
        end_time = t.time()
        exec_time = end_time - start_time

    else:
        l1, l2 = len(np.unique(feat1)), len(np.unique(feat2))

        a0, b0 = None, None # Default case is None

        if init == "ones":
            a0 = np.ones(l1)
            b0 = np.ones(l2)
            if norm:
                a0 = a0/l1
                b0 = b0/l2

        elif init == "random":
            a0=np.random.rand(l1)
            b0=np.random.rand(l2)
            if norm:
                a0 = a0/a0.sum()
                b0 = b0/b0.sum()

        elif init == "rel_fr":
            a0 = np.array([ feat1.count(f1_value)/len(feat1) for f1_value in np.unique(feat1)])
            b0 = np.array([ feat2.count(f2_value)/len(feat2) for f2_value in np.unique(feat2)])

        start_time = t.time()
        result = kernel._result(feat1, feat2, kernel_a=True, kernel_b=True, a0=a0, b0=b0)
        end_time = t.time()
            
        exec_time = end_time - start_time

    return result, exec_time

## Experiments

In [None]:
def experiment(dataset_creation_function, kernel):
    results = []

    for i in range(30):

        res_i = {}

        war_i, ani_i = dataset_creation_function()

        result_i_none, exec_time_none = compute_corr_and_exec_time(war_i, ani_i, kernel)

        result_i_ones, exec_time_ones = compute_corr_and_exec_time(war_i, ani_i, kernel, "ones")

        result_i_random, exec_time_random = compute_corr_and_exec_time(war_i, ani_i, kernel, "random")

        result_i_rf, exec_time_rf = compute_corr_and_exec_time(war_i, ani_i, kernel, "rel_fr")

        cramer, exec_time_cramer = compute_corr_and_exec_time(war_i, ani_i)

        res_i['correlation_none'] = result_i_none.value
        res_i['alpha_none'] = result_i_none.alpha
        res_i['beta_none'] = result_i_none.beta
        res_i['time_none'] = exec_time_none

        res_i['correlation_ones'] = result_i_ones.value
        res_i['alpha_ones'] = result_i_ones.alpha
        res_i['beta_ones'] = result_i_ones.beta
        res_i['time_ones'] = exec_time_ones

        res_i['correlation_random'] = result_i_random.value
        res_i['alpha_random'] = result_i_random.alpha
        res_i['beta_random'] = result_i_random.beta
        res_i['time_random'] = exec_time_random

        res_i['correlation_rf'] = result_i_rf.value
        res_i['alpha_rf'] = result_i_rf.alpha
        res_i['beta_rf'] = result_i_rf.beta
        res_i['time_rf'] = exec_time_rf

        res_i['correlation_cramer'] = cramer
        res_i['time_cramer'] = exec_time_cramer

        results.append(res_i)

    return results

In [None]:
def experiment_norm(dataset_creation_function, kernel):
    results = []

    for i in range(30):

        res_i = {}

        war_i, ani_i = dataset_creation_function()

        result_i_ones, exec_time_ones = compute_corr_and_exec_time(war_i, ani_i, kernel, "ones")
        result_i_ones_not_norm, exec_time_ones_not_norm = compute_corr_and_exec_time(war_i, ani_i, kernel, "ones", False)

        result_i_random, exec_time_random = compute_corr_and_exec_time(war_i, ani_i, kernel, "random")
        result_i_random_not_norm, exec_time_random_not_norm = compute_corr_and_exec_time(war_i, ani_i, kernel, "random", False)

        cramer, exec_time_cramer = compute_corr_and_exec_time(war_i, ani_i)

        res_i['correlation_ones'] = result_i_ones.value
        res_i['alpha_ones'] = result_i_ones.alpha
        res_i['beta_ones'] = result_i_ones.beta
        res_i['time_ones'] = exec_time_ones

        res_i['correlation_ones_not_norm'] = result_i_ones_not_norm.value
        res_i['alpha_ones_not_norm'] = result_i_ones_not_norm.alpha
        res_i['beta_ones_not_norm'] = result_i_ones_not_norm.beta
        res_i['time_ones_not_norm'] = exec_time_ones_not_norm

        res_i['correlation_random'] = result_i_random.value
        res_i['alpha_random'] = result_i_random.alpha
        res_i['beta_random'] = result_i_random.beta
        res_i['time_random'] = exec_time_random

        res_i['correlation_random_not_norm'] = result_i_random_not_norm.value
        res_i['alpha_random_not_norm'] = result_i_random_not_norm.alpha
        res_i['beta_random_not_norm'] = result_i_random_not_norm.beta
        res_i['time_random_not_norm'] = exec_time_random_not_norm

        res_i['correlation_cramer'] = cramer
        res_i['time_cramer'] = exec_time_cramer

        results.append(res_i)

    return results

In [None]:
def experiment_delta(dataset_creation_function, kernel):
    results = []

    for i in range(30):

        res_i = {}

        war_i, ani_i = dataset_creation_function()

        result_i_none, exec_time_none = compute_corr_and_exec_time(war_i, ani_i, kernel)

        result_i_rf, exec_time_rf = compute_corr_and_exec_time(war_i, ani_i, kernel, "rel_fr")

        cramer, exec_time_cramer = compute_corr_and_exec_time(war_i, ani_i)

        res_i['correlation_none'] = result_i_none.value
        res_i['alpha_none'] = result_i_none.alpha
        res_i['beta_none'] = result_i_none.beta
        res_i['time_none'] = exec_time_none

        res_i['correlation_rf'] = result_i_rf.value
        res_i['alpha_rf'] = result_i_rf.alpha
        res_i['beta_rf'] = result_i_rf.beta
        res_i['time_rf'] = exec_time_rf

        res_i['correlation_cramer'] = cramer
        res_i['time_cramer'] = exec_time_cramer

        results.append(res_i)

    return results

In [None]:
def experiment_general(dataset_creation_function, kernel1, kernel2):
    results = []

    for i in range(30):

        res_i = {}

        war_i, ani_i = dataset_creation_function(i)

        result_i_none, exec_time_none = compute_corr_and_exec_time(war_i, ani_i, kernel1)

        result_i_rf, exec_time_rf = compute_corr_and_exec_time(war_i, ani_i, kernel2, "rel_fr")

        cramer, exec_time_cramer = compute_corr_and_exec_time(war_i, ani_i)

        res_i['correlation_none'] = result_i_none.value
        res_i['alpha_none'] = result_i_none.alpha
        res_i['beta_none'] = result_i_none.beta
        res_i['time_none'] = exec_time_none

        res_i['correlation_rf'] = result_i_rf.value
        res_i['alpha_rf'] = result_i_rf.alpha
        res_i['beta_rf'] = result_i_rf.beta
        res_i['time_rf'] = exec_time_rf

        res_i['correlation_cramer'] = cramer
        res_i['time_cramer'] = exec_time_cramer

        results.append(res_i)

    return results

In [None]:
def experiment_hot_poly(dataset_creation_function, kernel1, kernel2):
    results = []

    for i in range(30):

        res_i = {}

        war_i, ani_i = dataset_creation_function()

        result_i_poly, exec_time_poly = compute_corr_and_exec_time(war_i, ani_i, kernel1)

        result_i_rf, exec_time_rf = compute_corr_and_exec_time(war_i, ani_i, kernel2, "rel_fr")

        cramer, exec_time_cramer = compute_corr_and_exec_time(war_i, ani_i)

        res_i['correlation_poly'] = result_i_poly.value
        res_i['alpha_poly'] = result_i_poly.alpha
        res_i['beta_poly'] = result_i_poly.beta
        res_i['time_poly'] = exec_time_poly

        res_i['correlation_rf'] = result_i_rf.value
        res_i['alpha_rf'] = result_i_rf.alpha
        res_i['beta_rf'] = result_i_rf.beta
        res_i['time_rf'] = exec_time_rf

        res_i['correlation_cramer'] = cramer
        res_i['time_cramer'] = exec_time_cramer

        results.append(res_i)

    return results

## Plots

### Results

In [None]:
def plot_results(results, target, x_axis_description):

    correlations_none = [res[f'{target}_none'] for res in results]
    correlations_ones = [res[f'{target}_ones'] for res in results]
    correlations_random = [res[f'{target}_random'] for res in results]
    correlations_rf = [res[f'{target}_rf'] for res in results]
    cramers = [res[f'{target}_cramer'] for res in results]

    plt.plot(correlations_none, marker='o', linestyle='--', color='b', label='One Hot Kernel, a0 and b0 None')
    plt.plot(correlations_ones, marker='d', linestyle='--', color='r', label='One Hot Kernel, a0 and b0 ones')
    plt.plot(correlations_random, marker='d', linestyle='--', color='g', label='One Hot Kernel, a0 and b0 random')
    plt.plot(correlations_rf, marker='o', linestyle='--', color='y', label='One Hot Kernel, a0 and b0 rf')
    plt.plot(cramers, marker='o', linestyle='--', color='c', label='Cramer\'s V')
    
    plt.title(f'{target} plot')
    plt.xlabel(x_axis_description)
    plt.ylabel(target)
    plt.legend()
    plt.grid(True)
    plt.show()

In [None]:
def plot_results_norm(results, target, x_axis_description):

    correlations_ones = [res[f'{target}_ones'] for res in results]
    correlations_ones_not_norm = [res[f'{target}_ones_not_norm'] for res in results]
    correlations_random = [res[f'{target}_random'] for res in results]
    correlations_random_not_norm = [res[f'{target}_random_not_norm'] for res in results]
    cramers = [res[f'{target}_cramer'] for res in results]

    plt.plot(correlations_ones, marker='o', linestyle='--', color='b', label='One Hot Kernel, a0 and b0 ones')
    plt.plot(correlations_ones_not_norm, marker='d', linestyle='--', color='r', label='One Hot Kernel, a0 and b0 ones not normalized')
    plt.plot(correlations_random, marker='d', linestyle='--', color='g', label='One Hot Kernel, a0 and b0 random')
    plt.plot(correlations_random_not_norm, marker='o', linestyle='--', color='y', label='One Hot Kernel, a0 and b0 random not normalized')
    plt.plot(cramers, marker='o', linestyle='--', color='c', label='Cramer\'s V')
    
    plt.title(f'{target} plot')
    plt.xlabel(x_axis_description)
    plt.ylabel(target)
    plt.legend()
    plt.grid(True)
    plt.show()

In [None]:
def plot_results_delta(results, target, x_axis_description):

    correlations_none = [res[f'{target}_none'] for res in results]
    correlations_rf = [res[f'{target}_rf'] for res in results]
    cramers = [res[f'{target}_cramer'] for res in results]

    plt.plot(correlations_none, marker='o', linestyle='--', color='b', label='One Hot Kernel, a0 and b0 None with delta')
    plt.plot(correlations_rf, marker='o', linestyle='--', color='y', label='One Hot Kernel, a0 and b0 rf with delta')
    plt.plot(cramers, marker='o', linestyle='--', color='c', label='Cramer\'s V')
    
    plt.title(f'{target} plot')
    plt.xlabel(x_axis_description)
    plt.ylabel(target)
    plt.legend()
    plt.grid(True)
    plt.show()

In [None]:
def plot_results_general(results, target, x_axis_description):

    correlations_none = [res[f'{target}_none'] for res in results]
    correlations_rf = [res[f'{target}_rf'] for res in results]
    cramers = [res[f'{target}_cramer'] for res in results]

    plt.plot(correlations_none, marker='o', linestyle='--', color='b', label='One Hot Kernel, a0 and b0 None with delta')
    plt.plot(correlations_rf, marker='o', linestyle='--', color='y', label='One Hot Kernel, a0 and b0 rf without delta')
    plt.plot(cramers, marker='o', linestyle='--', color='c', label='Cramer\'s V')
    
    plt.title(f'{target} plot')
    plt.xlabel(x_axis_description)
    plt.ylabel(target)
    plt.legend()
    plt.grid(True)
    plt.show()

In [None]:
def plot_results_hot_poly(results, target, x_axis_description):

    correlations_poly = [res[f'{target}_poly'] for res in results]
    correlations_rf = [res[f'{target}_rf'] for res in results]
    cramers = [res[f'{target}_cramer'] for res in results]

    plt.plot(correlations_poly, marker='o', linestyle='--', color='b', label='Polynomial Kernel, a0 and b0 None with delta')
    plt.plot(correlations_rf, marker='o', linestyle='--', color='y', label='One Hot Kernel, a0 and b0 rf without delta')
    plt.plot(cramers, marker='o', linestyle='--', color='c', label='Cramer\'s V')
    
    plt.title(f'{target} plot')
    plt.xlabel(x_axis_description)
    plt.ylabel(target)
    plt.legend()
    plt.grid(True)
    plt.show()

### Coefficients

In [None]:
def plot_coefficients(results, coeff, values, x_axis_description, absolute = True):
    
    ordered_val = np.unique(values)
    l = len(ordered_val)

    plt.figure(figsize=(20, 20))

    for i in range(l):

        c_none = [res[f'{coeff}_none'][i] for res in results]
        c_ones = [res[f'{coeff}_ones'][i] for res in results]       
        c_random = [res[f'{coeff}_random'][i] for res in results]
        c_rf = [res[f'{coeff}_rf'][i] for res in results]

        if absolute:
            c_none = list(map(abs, c_none))
            c_ones = list(map(abs, c_ones))
            c_random = list(map(abs, c_random))
            c_rf = list(map(abs, c_rf))

        plt.subplot(3, 3, i+1)
        
        plt.plot(c_none, marker='o', linestyle='--', color='b', label=f'{coeff} None')
        plt.plot(c_ones, marker='o', linestyle='--', color='r', label=f'{coeff} ones')
        plt.plot(c_random, marker='o', linestyle='--', color='g', label=f'{coeff} random')
        plt.plot(c_rf, marker='o', linestyle='--', color='y', label=f'{coeff} rf')

        plt.xlim(0, 30)
        plt.ylim(0, 1)

        plt.title(f'{coeff}{str(i+1)} - {ordered_val[i]}')
        plt.xlabel(x_axis_description)
        plt.ylabel(coeff)
        plt.legend()
        plt.grid(True)

    plt.show()

In [None]:
def plot_coefficients_norm(results, coeff, values, x_axis_description, absolute = True):
    
    ordered_val = np.unique(values)
    l = len(ordered_val)

    plt.figure(figsize=(20, 20))

    for i in range(l):

        c_ones = [res[f'{coeff}_ones'][i] for res in results]
        c_ones_not_norm = [res[f'{coeff}_ones_not_norm'][i] for res in results]       
        c_random = [res[f'{coeff}_random'][i] for res in results]
        c_random_not_norm = [res[f'{coeff}_random_not_norm'][i] for res in results]

        if absolute:
            c_ones = list(map(abs, c_ones))
            c_ones_not_norm = list(map(abs, c_ones_not_norm))
            c_random = list(map(abs, c_random))
            c_random_not_norm = list(map(abs, c_random_not_norm))

        plt.subplot(3, 3, i+1)
        
        plt.plot(c_ones, marker='o', linestyle='--', color='b', label=f'{coeff} ones')
        plt.plot(c_ones_not_norm, marker='o', linestyle='--', color='r', label=f'{coeff} ones not normalized')
        plt.plot(c_random, marker='o', linestyle='--', color='g', label=f'{coeff} random')
        plt.plot(c_random_not_norm, marker='o', linestyle='--', color='y', label=f'{coeff} random not normalized')

        plt.xlim(0, 30)
        plt.ylim(0, 1)

        plt.title(f'{coeff}{str(i+1)} - {ordered_val[i]}')
        plt.xlabel(x_axis_description)
        plt.ylabel(coeff)
        plt.legend()
        plt.grid(True)

    plt.show()

In [None]:
def plot_coefficients_delta(results, coeff, values, x_axis_description, absolute = True, zoom = None):
    
    ordered_val = np.unique(values)
    l = len(ordered_val)

    plt.figure(figsize=(20, 20))

    for i in range(l):

        c_none = [res[f'{coeff}_none'][i] for res in results]      
        c_rf = [res[f'{coeff}_rf'][i] for res in results]

        if absolute:
            c_none = list(map(abs, c_none))
            c_rf = list(map(abs, c_rf))

        plt.subplot(3, 3, i+1)
        
        plt.plot(c_none, marker='o', linestyle='--', color='b', label=f'{coeff} None with delta')
        plt.plot(c_rf, marker='d', linestyle='--', color='y', label=f'{coeff} rf with delta')

        plt.xlim(0, 30)
        if zoom != None:
            plt.ylim(0, zoom)
        else:
            plt.ylim(0, 1)

        plt.title(f'{coeff}{str(i+1)} - {ordered_val[i]}')
        plt.xlabel(x_axis_description)
        plt.ylabel(coeff)
        plt.legend()
        plt.grid(True)

    plt.show()

In [None]:
def plot_coefficients_general(results, coeff, values, x_axis_description, absolute = True, zoom = None):
    
    ordered_val = np.unique(values)
    l = len(ordered_val)

    plt.figure(figsize=(20, 20))

    for i in range(l):

        c_none = [res[f'{coeff}_none'][i] for res in results]      
        c_rf = [res[f'{coeff}_rf'][i] for res in results]

        if absolute:
            c_none = list(map(abs, c_none))
            c_rf = list(map(abs, c_rf))

        plt.subplot(3, 3, i+1)
        
        plt.plot(c_none, marker='o', linestyle='--', color='b', label=f'{coeff} None with delta')
        plt.plot(c_rf, marker='d', linestyle='--', color='y', label=f'{coeff} rf without delta')

        plt.xlim(0, 30)
        if zoom != None:
            plt.ylim(0, zoom)
        else:
            plt.ylim(0, 1)

        plt.title(f'{coeff}{str(i+1)} - {ordered_val[i]}')
        plt.xlabel(x_axis_description)
        plt.ylabel(coeff)
        plt.legend()
        plt.grid(True)

    plt.show()

In [None]:
def plot_coefficients_hot_poly(results, coeff, values, x_axis_description, absolute = True, zoom = None):
    
    ordered_val = np.unique(values)
    l = len(ordered_val)

    plt.figure(figsize=(20, 20))

    for i in range(l):

        c_poly = [res[f'{coeff}_poly'][i] for res in results]      
        c_rf = [res[f'{coeff}_rf'][i] for res in results]

        if absolute:
            c_poly = list(map(abs, c_poly))
            c_rf = list(map(abs, c_rf))

        plt.subplot(3, 3, i+1)
        
        plt.plot(c_poly, marker='o', linestyle='--', color='b', label=f'{coeff} polynomial, None with delta')
        plt.plot(c_rf, marker='d', linestyle='--', color='y', label=f'{coeff} one-hot, rf without delta')

        plt.xlim(0, 30)
        if zoom != None:
            plt.ylim(0, zoom)
        else:
            plt.ylim(0, 1)

        plt.title(f'{coeff}{str(i+1)} - {ordered_val[i]}')
        plt.xlabel(x_axis_description)
        plt.ylabel(coeff)
        plt.legend()
        plt.grid(True)

    plt.show()

# Utilities for Real Data

In [None]:
def plot_hist(dataset, feature, relative_feature = None, relative_value = None):

    title = f'{feature} distribution'

    if relative_feature!=None:
        
        title = title + f' relative to {relative_feature} equal to {relative_value}'
        dataset_rel = dataset[ dataset[relative_feature] == relative_value ]

        freq_total = dataset[feature].value_counts(normalize=True).reset_index()
        freq_total.columns = [feature, 'relative_freq']
        freq_total['dataset'] = 'Full dataset'

        freq_relative = dataset_rel[feature].value_counts(normalize=True).reset_index()
        freq_relative.columns = [feature, 'relative_freq']
        freq_relative['dataset'] = f'Relative to {relative_feature} = {relative_value}'

        # Combine the frequencies
        combined_freq = pd.concat([freq_total, freq_relative])

        # Plot using seaborn
        plt.figure(figsize=(20,10))
        sns.barplot(data=combined_freq, x=feature, y='relative_freq', hue='dataset')
        plt.title(f'Relative Frequencies of {feature}')
        plt.ylabel('Relative Frequency')
        plt.xlabel(feature)
        plt.show()
        
    else:
        plt.figure(figsize=(20,10))

        sns.countplot(data=dataset, x=feature, order=np.unique(dataset[feature]), stat="proportion")
        plt.xlabel(feature)
        plt.ylabel('Count')
        plt.title(title)

        plt.show()

In [None]:
def compute_correlation(dataset, feat1, feat2, kernel):
    
    sorted_feat1 = np.unique(dataset[feat1])
    sorted_feat2 = np.unique(dataset[feat2])


    feat1_rf_a0 = np.array([ len(dataset[ dataset[feat1] == value ])/len(dataset) for value in sorted_feat1])
    feat2_rf_b0 = np.array([ len(dataset[ dataset[feat2] == value ])/len(dataset) for value in sorted_feat2])
   
    return kernel._result(dataset[feat1], dataset[feat2], kernel_a=True, kernel_b=True, a0=feat1_rf_a0, b0=feat2_rf_b0)

In [None]:
def my_sort(couple):
    return abs(couple[1]) 

def print_coefficients(values, coefficients):

    max_len = max(len(value) for value in values)

    to_print = []
    
    for value, coefficient in zip(values, coefficients):
        to_print.append((value, abs(coefficient)))

    to_print.sort(key=my_sort, reverse=True)

    for couple in to_print:
        print("{:<{width}} : {}".format(couple[0], couple[1], width=max_len))

    

def print_coefficients_alphabetical(values, coefficients):

    max_len = max(len(value) for value in values)

    for value, coefficient in zip(values, coefficients):
        print("{:<{width}} : {}".format(value, coefficient, width=max_len))

## What happens when a0 and b0 are at None?

In [None]:
x = np.array([[0, 0, 0, 1], [0, 1, 0, 0], [0, 0, 1, 0], [1, 0, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1], [0, 0, 1, 0]]).transpose()
print(x)

[[0 0 0 1 0 0 0]
 [0 1 0 0 0 0 0]
 [0 0 1 0 1 0 1]
 [1 0 0 0 0 1 0]]


Nella result:

In [None]:
x = np.stack(x, axis=1)
print(x)

[[0 0 0 1]
 [0 1 0 0]
 [0 0 1 0]
 [1 0 0 0]
 [0 0 1 0]
 [0 0 0 1]
 [0 0 1 0]]


### Is the last column discarded? (_indices function)

In [None]:
my_delta_independent = 0.2

In [None]:
b = np.ones(shape=(len(x), 1))            
x = np.concatenate((b, x), axis=1)        
# compute the QR factorization
r = qr(x, mode='r')[0]
# build the diagonal of the R matrix (excluding the bias column)
r = np.abs(np.diag(r)[1:])
# independent columns are those having a value higher than the tolerance
mask = r >= my_delta_independent

In [None]:
print(mask)

[ True  True  True False]


In [None]:
x = x - np.mean(x, axis=0)
print(x)

[[ 0.         -0.14285714 -0.14285714 -0.42857143  0.71428571]
 [ 0.         -0.14285714  0.85714286 -0.42857143 -0.28571429]
 [ 0.         -0.14285714 -0.14285714  0.57142857 -0.28571429]
 [ 0.          0.85714286 -0.14285714 -0.42857143 -0.28571429]
 [ 0.         -0.14285714 -0.14285714  0.57142857 -0.28571429]
 [ 0.         -0.14285714 -0.14285714 -0.42857143  0.71428571]
 [ 0.         -0.14285714 -0.14285714  0.57142857 -0.28571429]]


In [None]:
print(type(x))

<class 'numpy.ndarray'>


### Calcolo di a0

In [None]:
my_esp = 1e-9

In [None]:
somma = x.sum(axis=1)
print(somma)

[1.11022302e-16 1.66533454e-16 0.00000000e+00 1.66533454e-16
 0.00000000e+00 1.11022302e-16 0.00000000e+00]


In [None]:
varianza = somma.var(ddof=0)
print(varianza)

5.15677568782868e-33


In [None]:
denominator = np.sqrt(varianza + my_esp) 
print(denominator)

3.1622776601683795e-05


In [None]:
my_a0 = np.ones(4) / denominator
print(my_a0)

[31622.77660168 31622.77660168 31622.77660168 31622.77660168]
