In [12]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans, DBSCAN
from instance_space_analysis.feature_computations import get_instance_features


from sklearn.inspection import permutation_importance

In [13]:
data = pd.read_csv("data/heuristic_performance_final_scratch.csv")
data.set_index("instance", inplace=True)

instance_features = get_instance_features(data, 'data/qapdata/')

scaler = StandardScaler()
instance_features[instance_features.columns] = scaler.fit_transform(instance_features[instance_features.columns])

algorithms = [
    # 'grasp_local_search', 
    # 'grasp_simulated_annealing', 
    # 'genetic_algorithm',
    '10_multistart_total_swap_optimal_neighbour_objective',
    # '10_multistart_adjacent_swap_optimal_neighbour_objective',
    # '10_multistart_adjacent_swap_first_improvement_objective',
    # '10_multistart_total_swap_first_improvement_objective',
    # 'constructive_greedy_local_search_objective',
    'elshafei_constructive_greedy_local_search_objective',
]

restricted_df = data[algorithms]
restricted_df = restricted_df.drop_duplicates(keep='first')

best_algos_df = pd.DataFrame()

best_algos_df['performance'] = restricted_df.min(axis=1)
best_algos_df['best_algo'] = restricted_df.idxmin(axis=1)

instance_features['best_algo'] = restricted_df.idxmin(axis=1).astype('category')
instance_features.dropna(inplace=True)

instance_features

Unnamed: 0,problem_size,flow_sparsity,distance_sparsity,flow_asymmetry,distance_asymmetry,flow_dominance,distance_dominance,flow_max,distance_max,flow_min,distance_min,flow_mean,distance_mean,best_algo
tai256c,5.954700,9.186069,-0.595513,0.399050,0.428339,0.707893,0.934446,-0.312484,5.073131,-0.274350,3.323562,-0.247417,2.607582,10_multistart_total_swap_optimal_neighbour_obj...
nug16b,-0.687821,-0.285651,-0.280453,0.399050,0.428339,-0.438630,-0.078131,-0.307507,-0.303716,-0.267333,-0.208219,-0.240930,-0.122438,elshafei_constructive_greedy_local_search_obje...
tai35b,-0.161955,-0.285651,0.688731,0.399050,-2.025594,-0.305154,1.841030,0.742713,0.866027,0.888181,0.367406,1.607858,-0.124651,10_multistart_total_swap_optimal_neighbour_obj...
chr22a,-0.521758,0.529100,-0.595513,0.399050,0.428339,1.608492,-0.560231,-0.263706,-0.299360,-0.271149,-0.195523,-0.247417,-0.102523,elshafei_constructive_greedy_local_search_obje...
esc16h,-0.687821,-0.258977,-0.315460,0.399050,0.428339,0.099192,-0.384595,-0.292575,-0.304092,-0.260349,-0.209032,-0.244174,-0.123544,10_multistart_total_swap_optimal_neighbour_obj...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
tai20a,-0.577112,-0.272848,-0.574509,0.399050,0.428339,-0.370562,-0.580669,-0.215923,-0.298930,-0.139188,-0.183848,-0.104704,-0.071544,10_multistart_total_swap_optimal_neighbour_obj...
lipa80b,1.083518,-0.240306,-0.595513,-3.129092,0.428339,-0.408493,-0.804071,-0.234837,-0.299951,-0.159951,-0.187245,-0.120921,-0.079288,10_multistart_total_swap_optimal_neighbour_obj...
rou12,-0.798530,-0.285651,-0.583844,0.399050,0.428339,-0.369912,-0.514195,-0.214928,-0.298930,-0.136978,-0.186240,-0.104704,-0.078182,10_multistart_total_swap_optimal_neighbour_obj...
tai20b,-0.577112,-0.285651,-0.021404,0.399050,-2.481396,-0.028357,2.071841,0.501810,2.898463,0.521694,1.368174,-0.098217,-0.123544,10_multistart_total_swap_optimal_neighbour_obj...


In [14]:
restricted_df

Unnamed: 0_level_0,10_multistart_total_swap_optimal_neighbour_objective,elshafei_constructive_greedy_local_search_objective
instance,Unnamed: 1_level_1,Unnamed: 2_level_1
tai256c,0.002624,0.003676
nug16b,0.054839,0.027419
tai35b,0.015643,0.019450
chr22a,0.051657,0.033788
esc16h,0.000000,0.000000
...,...,...
tai20a,0.024899,0.042887
lipa80b,0.214726,0.220071
rou12,0.001376,0.056927
tai20b,0.009204,0.010278


In [15]:
import plotly.express as px
px.box(best_algos_df, y="performance", color="best_algo")

In [16]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler


train, test = train_test_split(instance_features, test_size=0.2, random_state=42)
x_train, y_train = train.loc[:,train.columns != 'best_algo'], train['best_algo']
x_test, y_test = test.loc[:,test.columns != 'best_algo'], test['best_algo']

scaler = StandardScaler().fit(x_train[x_train.columns])
x_train[x_train.columns] = scaler.transform(x_train[x_train.columns])
x_test[x_test.columns] = scaler.transform(x_test[x_test.columns])

logistic_model = LogisticRegression().fit(x_train,y_train)

y_pred = logistic_model.predict(x_test)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [17]:
y_pred

array(['10_multistart_total_swap_optimal_neighbour_objective',
       '10_multistart_total_swap_optimal_neighbour_objective',
       '10_multistart_total_swap_optimal_neighbour_objective',
       '10_multistart_total_swap_optimal_neighbour_objective',
       '10_multistart_total_swap_optimal_neighbour_objective',
       '10_multistart_total_swap_optimal_neighbour_objective',
       '10_multistart_total_swap_optimal_neighbour_objective',
       '10_multistart_total_swap_optimal_neighbour_objective',
       '10_multistart_total_swap_optimal_neighbour_objective',
       '10_multistart_total_swap_optimal_neighbour_objective',
       '10_multistart_total_swap_optimal_neighbour_objective',
       '10_multistart_total_swap_optimal_neighbour_objective',
       '10_multistart_total_swap_optimal_neighbour_objective',
       '10_multistart_total_swap_optimal_neighbour_objective',
       '10_multistart_total_swap_optimal_neighbour_objective',
       '10_multistart_total_swap_optimal_neighbour_obje

In [18]:
y_test

had18      elshafei_constructive_greedy_local_search_obje...
nug21      10_multistart_total_swap_optimal_neighbour_obj...
esc16h     10_multistart_total_swap_optimal_neighbour_obj...
lipa70b    10_multistart_total_swap_optimal_neighbour_obj...
tai80a     10_multistart_total_swap_optimal_neighbour_obj...
tai20b     10_multistart_total_swap_optimal_neighbour_obj...
bur26d     10_multistart_total_swap_optimal_neighbour_obj...
nug16a     10_multistart_total_swap_optimal_neighbour_obj...
lipa90a    elshafei_constructive_greedy_local_search_obje...
nug25      10_multistart_total_swap_optimal_neighbour_obj...
rou20      elshafei_constructive_greedy_local_search_obje...
tai60a     10_multistart_total_swap_optimal_neighbour_obj...
tai35a     10_multistart_total_swap_optimal_neighbour_obj...
nug22      10_multistart_total_swap_optimal_neighbour_obj...
chr12b     10_multistart_total_swap_optimal_neighbour_obj...
kra30a     10_multistart_total_swap_optimal_neighbour_obj...
sko72      10_multistart

In [19]:
print(f"average prediction accuracy of {np.mean(y_test.values == y_pred)}")

average prediction accuracy of 0.8333333333333334


In [20]:
y_train.unique()

['elshafei_constructive_greedy_local_search_obj..., '10_multistart_total_swap_optimal_neighbour_ob...]
Categories (2, object): ['10_multistart_total_swap_optimal_neighbour_ob..., 'elshafei_constructive_greedy_local_search_obj...]

In [21]:
from statsmodels.discrete.discrete_model import MNLogit

logistic_model = MNLogit(np.asarray(y_train.cat.codes), np.asarray(x_train)).fit()
logistic_model.summary() 

Optimization terminated successfully.
         Current function value: 0.652466
         Iterations 5


0,1,2,3
Dep. Variable:,y,No. Observations:,95.0
Model:,MNLogit,Df Residuals:,82.0
Method:,MLE,Df Model:,12.0
Date:,"Sun, 08 Oct 2023",Pseudo R-squ.:,-0.5605
Time:,11:24:59,Log-Likelihood:,-61.984
converged:,True,LL-Null:,-39.721
Covariance Type:,nonrobust,LLR p-value:,1.0

y=1,coef,std err,z,P>|z|,[0.025,0.975]
x1,0.4351,0.411,1.06,0.289,-0.37,1.24
x2,-0.4197,0.847,-0.495,0.62,-2.08,1.241
x3,-0.4748,0.521,-0.911,0.362,-1.496,0.546
x4,0.1989,0.271,0.734,0.463,-0.332,0.73
x5,0.0762,0.435,0.175,0.861,-0.777,0.929
x6,0.8893,0.664,1.338,0.181,-0.413,2.192
x7,0.7452,0.697,1.07,0.285,-0.62,2.111
x8,-0.5448,0.729,-0.748,0.455,-1.973,0.883
x9,-0.5863,0.911,-0.643,0.52,-2.373,1.2
x10,0.4361,1.972,0.221,0.825,-3.429,4.301


In [22]:
import statsmodels.api as sm 

logistic_model = sm.Logit(np.asarray(y_train.cat.codes), np.asarray(x_train)).fit(method='BFGS')
logistic_model.summary() 

         Current function value: 0.652606
         Iterations: 35
         Function evaluations: 36
         Gradient evaluations: 36



Maximum number of iterations has been exceeded.


Maximum Likelihood optimization failed to converge. Check mle_retvals



0,1,2,3
Dep. Variable:,y,No. Observations:,95.0
Model:,Logit,Df Residuals:,82.0
Method:,MLE,Df Model:,12.0
Date:,"Sun, 08 Oct 2023",Pseudo R-squ.:,-0.5608
Time:,11:24:59,Log-Likelihood:,-61.998
converged:,False,LL-Null:,-39.721
Covariance Type:,nonrobust,LLR p-value:,1.0

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
x1,0.4343,0.410,1.058,0.290,-0.370,1.239
x2,-0.4641,0.850,-0.546,0.585,-2.131,1.202
x3,-0.4699,0.520,-0.903,0.366,-1.490,0.550
x4,0.1958,0.271,0.723,0.469,-0.335,0.726
x5,0.0649,0.433,0.150,0.881,-0.783,0.913
x6,0.9169,0.666,1.376,0.169,-0.389,2.223
x7,0.7240,0.690,1.049,0.294,-0.629,2.077
x8,-0.4675,0.726,-0.644,0.520,-1.891,0.956
x9,-0.5267,0.903,-0.584,0.560,-2.296,1.242
