# Imports

In [None]:
import pandas as pd
import seaborn as sns
import numpy as np
from matplotlib import pyplot as plt
from collections import defaultdict
import pickle
import time
from tqdm import tqdm
from copy import deepcopy
import sys
sys.setrecursionlimit(50000)
import warnings
warnings.filterwarnings("ignore")
from multiprocessing import Pool

import pre_processing as pp
from arm import generate_itemsets, compute_ar_rules, get_arm_data
from ear import compute_ear_rules
from nn_rules import compute_nn_rules, get_rules
from layout import create_layout
from warehouse import Warehouse
from articles import create_articles
from word_analysis import calculate_word_scores
from greedy import Greedy
from ga import GeneticModel
import training_methods as tm
from parallel_processing import fast_vectorize_data
from evaluation import evaluate_solution_for_greedy, evaluate_solution, evaluate_solution_for_greedy_no_depot


# Data Loading

In [None]:
n_orders = 20000
def create_hp_tuning_data(cv_index, data_path):

    data_1_2, data_3, data_4 = pp.train_val_test_split(data_path=data_path, 
                                                       val_prop=0.25, test_prop=0.25, 
                                                       n_orders=n_orders*4)

    data_1, data_2, data_22 = pp.train_val_test_split(data=data_1_2, 
                                                     val_prop=0.25, test_prop=0.25, 
                                                     n_orders=n_orders*2)

    data_2 = pd.concat([data_2, data_22], ignore_index=True, axis=0)

                                    
    

    if cv_index == 0:
        data = data_1.reset_index().drop('index', axis=1)
        
    elif cv_index == 1:
        data = data_2.reset_index().drop('index', axis=1)

    elif cv_index == 2:
        data = data_3.reset_index().drop('index', axis=1)

    elif cv_index == 3:
        data = data_4.reset_index().drop('index', axis=1)
    train_data, val_data, test_data = pp.train_val_test_split(data=data, 
                                                              val_prop=0.25, test_prop=0.25, 
                                                              n_orders=n_orders)




    train_data = pd.concat([train_data, val_data], ignore_index=True, axis=0)                             
    
    train_data = pp.preprocess_data(train_data)
    test_data = pp.preprocess_data(test_data)


    train_orders = pp.unique_orders(train_data)
    test_orders = pp.unique_orders(test_data)

    train_unique = list(train_data['ArticleName'].unique())

    all_unique = pd.concat([train_data, test_data], ignore_index=True, axis=0)['ArticleName'].unique() 

    train_df, train_supports = get_arm_data(train_orders, list(all_unique))
    test_df, _ = get_arm_data(test_orders, list(all_unique))



    test_df = test_df.reindex(train_df.columns, axis=1)

    article_list_ordered = list(train_df.columns)

    vectorizer = pp.Vectorizer(article_list_ordered)

    bow = vectorizer.get_sparse_bow_embeddings(list(all_unique), save_path=f'../Data/sparse_bow_{n_orders}_cv{cv_index}.pkl')

    relevant_data = [train_df, test_df, train_supports, train_unique, all_unique, train_orders, vectorizer]
    f = open(f'../Data/{n_orders}_orders/data_{cv_index}.pkl', "wb")
    pickle.dump(relevant_data, f)
    f.close()
    return relevant_data

In [None]:
def load_data(filepath):
    f = open(filepath, "rb")
    data = pickle.load(f)
    f.close()
    return data


In [None]:
layout = create_layout()

# Evaluation

In [None]:
def run_greedy(articles, 
               layout=layout, 
               distance_weight=1, 
               rule_weight=0.01, 
               rule_weight_for_article_scores=1e4, 
               penalty_weight=0, 
               verbose=False,
               hp_tuning=False):

    main_model = Greedy(layout=layout, 
                        articles=articles, 
                        distance_weight=distance_weight, 
                        rule_weight=rule_weight, 
                        rule_weight_for_article_scores=rule_weight_for_article_scores, 
                        penalty_weight=penalty_weight,
                        hp_tuning=hp_tuning)

    warehouse = Warehouse(layout, main_model)

    solution_matrix, _, product_to_shelves = warehouse.optimize_locations() 
    
    if verbose:
        sns.heatmap(solution_matrix, square=True) 
        plt.show()  

    return product_to_shelves

In [None]:
def run_genetic(articles,
                layout=layout, 
                distance_weight=1,
                rule_weight=1, 
                population_size=100, 
                n_iter=100, 
                crossover_rate=1, 
                mutation_rate=0.1, 
                k_selection=3, 
                crossover='crossover_height', 
                fitness='fitness_distance_and_rules', 
                orders=None, 
                warm_start=False, 
                parallel=False,
                verbose=0,
                solution=None):

    main_model = GeneticModel(articles=articles,
                              layout=layout, 
                              rule_weight=rule_weight, 
                              distance_weight=distance_weight,
                              population_size=population_size, 
                              n_iter=n_iter, 
                              crossover_rate=crossover_rate, 
                              mutation_rate=mutation_rate,
                              k_selection=k_selection, 
                              crossover=crossover, 
                              fitness=fitness,
                              orders=orders,
                              warm_start=warm_start,
                              parallel=parallel,
                              verbose=verbose,
                              solution=solution)

    warehouse = Warehouse(layout, main_model)

    solution_matrix, best_scores, product_to_shelves = warehouse.optimize_locations() 
    
    if verbose > 0:
        plt.figure(1)
        sns.heatmap(solution_matrix, square=True) 
        plt.show() 
        plt.figure(2)
        plt.plot(np.arange(len(best_scores)), best_scores) 
        plt.show() 

        
    return product_to_shelves

In [None]:
def get_ear_rules(ar_rules, 
                  unique_items,
                  k=4,
                  r=1, 
                  embeddings_path=f'../Data/sparse_bow_{n_orders}_cv0.pkl', 
                  save_path=None, 
                  word_weights=defaultdict(lambda: 1), 
                  is_sparse=True,
                  beta=1/10,
                  parallel_rules=False,
                  parallel_weight=1):
    if save_path is None:
        save_path = f'../ear_rules.pkl'
    try:
        f = open(save_path, "rb")
        ear_rules = pickle.load(f)
        f.close()
    except:
        ear_rules = compute_ear_rules(ar_rules, 
                                        unique_items=unique_items,
                                        k=k,
                                        radius=r, 
                                        embeddings_path=embeddings_path, 
                                        save_path=save_path, 
                                        word_weights=word_weights, 
                                        is_sparse=is_sparse,
                                        beta=beta,
                                        parallel_rules=parallel_rules,
                                        parallel_weight=parallel_weight) 
    return ear_rules

# TEST!

In [None]:
def create_data(train_path, test_path, save_path):

    d1, d2, d3 = pp.train_val_test_split(data_path=train_path, 
                                                        val_prop=0.25, test_prop=0.25, 
                                                        n_orders=10000000)

    train_data = pd.concat([d1, d2, d3], ignore_index=True, axis=0) 


    d1, d2, d3 = pp.train_val_test_split(data_path=test_path, 
                                                        val_prop=0.25, test_prop=0.25, 
                                                        n_orders=10000000)

    test_data = pd.concat([d1, d2, d3], ignore_index=True, axis=0) 


                                    
                            

    train_data = pp.preprocess_data(train_data)
    test_data = pp.preprocess_data(test_data)


    train_orders = pp.unique_orders(train_data)
    test_orders = pp.unique_orders(test_data)

    train_unique = list(train_data['ArticleName'].unique())

    all_unique = pd.concat([train_data, test_data], ignore_index=True, axis=0)['ArticleName'].unique() 

    train_df, train_supports = get_arm_data(train_orders, list(all_unique))
    test_df, _ = get_arm_data(test_orders, list(all_unique))



    test_df = test_df.reindex(train_df.columns, axis=1)

    article_list_ordered = list(train_df.columns)

    vectorizer = pp.Vectorizer(article_list_ordered)



    bow = vectorizer.get_sparse_bow_embeddings(list(all_unique), save_path=f'../Data/sparse_bow.pkl')



    relevant_data = [train_df, test_df, train_supports, train_unique, all_unique, train_orders, vectorizer]
    f = open(save_path, "wb")
    pickle.dump(relevant_data, f)
    f.close()
    return relevant_data

In [None]:
f = open(f'data_path.pkl', "rb")
data = pickle.load(f)
f.close()

train_df, test_df, train_supports, train_unique, all_unique, train_orders, vectorizer = data
bow = vectorizer.get_sparse_bow_embeddings(list(all_unique), save_path=f'../Data/sparse_bow.pkl')

# Baselines

In [None]:
random_supports = train_supports.copy()
for article in random_supports.keys():
    random_supports[article] = np.random.rand()


In [None]:
# RANDOM BASELINE
random_results = []
for _ in range(5):
    for article in random_supports.keys():
        random_supports[article] = np.random.rand()
    train_articles = create_articles(random_supports, ar_weight=0, ear_weight=0, nn_weight=0)

    solution = run_greedy(articles=train_articles, layout=layout, distance_weight=1, rule_weight=0, rule_weight_for_article_scores=0, penalty_weight=0, verbose=False, hp_tuning=False)

    average_distance_travelled = evaluate_solution_for_greedy(solution, test_df, layout, batch_size=1, verbose=True)
    random_results.append(average_distance_travelled)
print(f'{np.mean(random_results)} +- {np.std(random_results)}')

In [None]:
## IN ORDER BASELINE
train_articles = create_articles(train_supports, ar_weight=0, ear_weight=0, nn_weight=0)
solution = run_greedy(articles=train_articles, layout=layout, distance_weight=1, rule_weight=0, rule_weight_for_article_scores=0, penalty_weight=0, verbose=False, hp_tuning=False)
average_distance_travelled = evaluate_solution_for_greedy(solution, test_df, layout, batch_size=1, verbose=True)
print('Baseline:', average_distance_travelled)



# Network

In [None]:
def split_into_rows(n_workers, orders):
    order_list = []
    orders2 = list(orders['Articles'])
    
    if len(orders2) % n_workers == 0:
        # Dividable
        size = len(orders2) // n_workers
        
        for i in range(n_workers):
            row = orders2[i*size:(i+1)*size]
            order_list.append(row)

        return order_list

    else:
        n_over = len(orders2) % n_workers
        size = len(orders2) // n_workers
        
        for i in range(n_workers):
            row = orders2[i*size:(i+1)*size]
            order_list.append(row)
        
        for j in range(n_over):
            idx = j+1
            order = orders2[-idx]
            order_list[j].append(order)

        return order_list

In [None]:
### Parallell
# List for storing the results
train_results = []
test_results = []

### Callback function for collecting the results
def callback_train(result):
    global train_results
    train_results.append(result)


def callback_test(result):
    global test_results
    test_results.append(result)


print(train_orders.shape)

st_par = time.time()
# Needs main to function
if __name__ == '__main__':
    print('started')
    p_train = Pool(8) # A pool of 8 processes to run at the same time
    s_train = time.time()

    # Split orders into 8 parts
    order_list_train = split_into_rows(8, train_orders)
    for orders in order_list_train:
        p_train.apply_async(fast_vectorize_data, args=(vectorizer, orders), callback=callback_train)
    
    # Close the processes
    p_train.close()
    p_train.join()
    print(f'Parallell train time: {time.time() - s_train}, n=15000\n')

t = []
for tr in train_results:
    t += tr
    
X_train = np.asarray(t, dtype=np.int8)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[3])
print(X_train.shape)


X_train_file = open(f'../Data/X_train.pkl', 'wb')
pickle.dump(X_train, X_train_file)
X_train_file.close()


In [None]:

X_shape = X_train.shape
article_shape = X_shape[-1] 

embedding_dim = 256
alpha = 1.0
w_path = f'../Data/f_theta.h5' 

f_train = tm.F_theta(article_shape=(article_shape), train_predict=0, 
                    embedding_dim=embedding_dim, separation=alpha,
                    weights_path=w_path, visualize=False, verbose=False)


clean = False
f_train.train(X_train=X_train[:-10000,:,:], 
              y_train=X_train[:-10000,:,:], 
              val_data=([X_train[-10000:,0,:], X_train[-10000:,1,:], X_train[-10000:,2,:]], X_train[-10000:,0,:]), 
              num_epochs=3,
              batch_size=1024, 
              num_workers=8, 
              early_stopping=False)
if clean:
    del X_train

In [None]:

f_pred = tm.F_theta(article_shape=(article_shape), train_predict=1,
                    embedding_dim=embedding_dim, separation=alpha,
                    weights_path=w_path, visualize=False, verbose=False)

# Create embeddings
article_names, article_vecs = vectorizer.get_articles_for_embedding([])
embedding_space = np.asarray(f_pred.network(article_vecs))
embedding_map = {}

# Map article name to embedding
for name, embedding in zip(article_names, embedding_space):
    embedding_map[name] = embedding



nn_rules = compute_nn_rules(train_supports,
                            embedding_space=embedding_space,
                            embedding_map=embedding_map, 
                            k=100,
                            r=1e-3,
                            save_path='../Data/nn_rules.pkl')

# Genetic tests

In [None]:
# AR results!
dw = 1e5
rw = 1


itemsets, itemsets_size_2 = generate_itemsets(train_df, min_support=0.0001)

ar_rules = compute_ar_rules(itemsets_size_2, metric='lift', min_threshold=1, 
                            save_path=f'../Data/ar_rules.pkl')


train_articles = create_articles(train_supports, ar_rules=ar_rules, ar_weight=1, ear_weight=0, nn_weight=0)
solution = run_genetic(articles=train_articles,
                layout=layout, 
                distance_weight=dw,
                rule_weight=rw, 
                population_size=100, 
                n_iter=100, 
                crossover_rate=100, 
                mutation_rate=0.001, 
                k_selection=3, 
                crossover='crossover_height', 
                fitness='fitness_distance_and_rules', 
                orders=None, 
                warm_start=True, 
                parallel=False,
                verbose=1,
                solution=None)



                
average_distance_travelled = evaluate_solution_for_greedy(solution, test_df, layout, batch_size=1, verbose=True)
print('Result:', average_distance_travelled)


In [None]:
# Unweighted EAR results!
dw = 3e8
rw = 1



itemsets, itemsets_size_2 = generate_itemsets(train_df, min_support=0.0001)

ar_rules = compute_ar_rules(itemsets_size_2, metric='lift', min_threshold=1, 
                            save_path=f'../Data/ar_rules.pkl')



parallel_weight = 1000
k = 6
r = 6


words = vectorizer.cv.get_feature_names_out()

ear_rules = get_ear_rules(ar_rules, 
                            unique_items=list(all_unique),
                            k=k,
                            r=r, 
                            embeddings_path=f'../Data/sparse_bow.pkl', 
                            save_path=f'../Data/ear_rules_unweeighted.pkl', 
                            word_weights=np.ones_like(words), 
                            is_sparse=True,
                            beta=r/10,
                            parallel_rules=True,
                            parallel_weight=parallel_weight) 


train_articles = create_articles(train_supports, ear_rules=ear_rules, ar_weight=0, ear_weight=1, nn_weight=0)
solution = run_genetic(articles=train_articles,
                layout=layout, 
                distance_weight=dw,
                rule_weight=rw, 
                population_size=100, 
                n_iter=5, 
                crossover_rate=100, 
                mutation_rate=0.001, 
                k_selection=3, 
                crossover='crossover_height', 
                fitness='fitness_distance_and_rules', 
                orders=None, 
                warm_start=True, 
                parallel=False,
                verbose=1,
                solution=None)



                
average_distance_travelled = evaluate_solution_for_greedy(solution, test_df, layout, batch_size=1, verbose=True)
print('Result:', average_distance_travelled)


In [None]:
# EAR results!
dw = 5e6
rw = 1

gamma = 1.6
delta = 3.5
parallel_weight = 10
k = 2
r = 1



itemsets, itemsets_size_2 = generate_itemsets(train_df, min_support=0.0001)

ar_rules = compute_ar_rules(itemsets_size_2, metric='lift', min_threshold=1, 
                            save_path=f'../Data/ar_rules.pkl')

words = vectorizer.cv.get_feature_names_out()
word_scores, single_word_scores, word_frequencies = calculate_word_scores(train_unique, 
                                                                            train_orders, 
                                                                            words, 
                                                                            gamma=gamma,
                                                                            delta=delta)
                                                                            
min_score = np.min([score for score in word_scores if score > 0])
word_scores = [score if score > 0 else min_score/2 for score in word_scores]
ear_rules = get_ear_rules(ar_rules, 
                            unique_items=list(all_unique),
                            k=k,
                            r=r, 
                            embeddings_path=f'../Data/sparse_bow.pkl', 
                            save_path=f'../Data/ear_rules_pair_scores.pkl', 
                            word_weights=word_scores, 
                            is_sparse=True,
                            beta=r/10,
                            parallel_rules=True,
                            parallel_weight=parallel_weight) 


train_articles = create_articles(train_supports, ear_rules=ear_rules, ar_weight=0, ear_weight=1, nn_weight=0)
solution = run_genetic(articles=train_articles,
                layout=layout, 
                distance_weight=dw,
                rule_weight=rw, 
                population_size=100, 
                n_iter=5, 
                crossover_rate=100, 
                mutation_rate=0.001, 
                k_selection=3, 
                crossover='crossover_height', 
                fitness='fitness_distance_and_rules', 
                orders=None, 
                warm_start=True, 
                parallel=False,
                verbose=1,
                solution=None)



                
average_distance_travelled = evaluate_solution_for_greedy(solution, test_df, layout, batch_size=1, verbose=True)
print('Result:', average_distance_travelled)


In [None]:
# NNR results!
dw = 1e5
rw = 1


train_articles = create_articles(train_supports, nn_rules=nn_rules, ar_weight=0, ear_weight=0, nn_weight=1)
solution = run_genetic(articles=train_articles,
                layout=layout, 
                distance_weight=dw,
                rule_weight=rw, 
                population_size=100, 
                n_iter=5000, 
                crossover_rate=100, 
                mutation_rate=0.001, 
                k_selection=3, 
                crossover='crossover_height', 
                fitness='fitness_distance_and_rules', 
                orders=None, 
                warm_start=True, 
                parallel=False,
                verbose=1,
                solution=None)



                
average_distance_travelled = evaluate_solution_for_greedy(solution, test_df, layout, batch_size=1, verbose=True)
print('Result:', average_distance_travelled)


In [None]:
# All rules results!
dw = 5e6
rw = 1

gamma = 1.6
delta = 3.5
parallel_weight = 10
k = 2
r = 1

ear_weight = 1e-4
nn_weight = 10



itemsets, itemsets_size_2 = generate_itemsets(train_df, min_support=0.0001)

ar_rules = compute_ar_rules(itemsets_size_2, metric='lift', min_threshold=1, 
                            save_path=f'../Data/ar_rules.pkl')

words = vectorizer.cv.get_feature_names_out()
word_scores, single_word_scores, word_frequencies = calculate_word_scores(train_unique, 
                                                                            train_orders, 
                                                                            words, 
                                                                            gamma=gamma,
                                                                            delta=delta)
                                                                            
min_score = np.min([score for score in word_scores if score > 0])
word_scores = [score if score > 0 else min_score/2 for score in word_scores]
ear_rules = get_ear_rules(ar_rules, 
                            unique_items=list(all_unique),
                            k=k,
                            r=r, 
                            embeddings_path=f'../Data/sparse_bow.pkl', 
                            save_path=f'../Data/ear_rules_pair_scores.pkl', 
                            word_weights=word_scores, 
                            is_sparse=True,
                            beta=r/10,
                            parallel_rules=True,
                            parallel_weight=parallel_weight) 



train_articles = create_articles(train_supports, ar_rules=ar_rules, ear_rules=ear_rules, nn_rules=nn_rules, ar_weight=1, ear_weight=ear_weight, nn_weight=nn_weight)
solution = run_genetic(articles=train_articles,
                layout=layout, 
                distance_weight=dw,
                rule_weight=rw, 
                population_size=100, 
                n_iter=5000, 
                crossover_rate=100, 
                mutation_rate=0.001, 
                k_selection=3, 
                crossover='crossover_height', 
                fitness='fitness_distance_and_rules', 
                orders=None, 
                warm_start=True, 
                parallel=False,
                verbose=1,
                solution=None)



                
average_distance_travelled = evaluate_solution_for_greedy(solution, test_df, layout, batch_size=1, verbose=True)
print('Result:', average_distance_travelled)


# Greedy tests

In [None]:
# EAR results!
parallel_weight = 10
gamma = 1.6
delta = 3.5
k = 2
r = 1

dw = 5e5
aw = 1
p = 0




itemsets, itemsets_size_2 = generate_itemsets(train_df, min_support=0.0001)

ar_rules = compute_ar_rules(itemsets_size_2, metric='lift', min_threshold=1, 
                            save_path=f'../Data/ar_rules.pkl')

words = vectorizer.cv.get_feature_names_out()
word_scores, single_word_scores, word_frequencies = calculate_word_scores(train_unique, 
                                                                            train_orders, 
                                                                            words, 
                                                                            gamma=gamma,
                                                                            delta=delta)
                                                                            
min_score = np.min([score for score in word_scores if score > 0])
word_scores = [score if score > 0 else min_score/2 for score in word_scores]
ear_rules = get_ear_rules(ar_rules, 
                            unique_items=list(all_unique),
                            k=k,
                            r=r, 
                            embeddings_path=f'../Data/sparse_bow.pkl', 
                            save_path=f'../Data/ear_rules_pair_scores.pkl', 
                            word_weights=word_scores, 
                            is_sparse=True,
                            beta=r/10,
                            parallel_rules=True,
                            parallel_weight=parallel_weight) 

train_articles = create_articles(train_supports, ear_rules=ear_rules, ar_weight=0, ear_weight=1, nn_weight=0)
solution = run_greedy(articles=train_articles, layout=layout, distance_weight=dw, rule_weight=1, rule_weight_for_article_scores=aw, penalty_weight=p, verbose=False, hp_tuning=False)
average_distance_travelled = evaluate_solution_for_greedy(solution, test_df, layout, batch_size=1, verbose=True)
print('Result:', average_distance_travelled)

In [None]:
# Unweighted EAR results!
parallel_weight = 1000

k = 6
r = 6

dw = 1e8
aw = 1
p = 0



itemsets, itemsets_size_2 = generate_itemsets(train_df, min_support=0.0001)

ar_rules = compute_ar_rules(itemsets_size_2, metric='lift', min_threshold=1, 
                            save_path=f'../Data/ar_rules.pkl')


ear_rules = get_ear_rules(ar_rules, 
                            unique_items=list(all_unique),
                            k=k,
                            r=r, 
                            embeddings_path=f'../Data/sparse_bow.pkl', 
                            save_path=f'../Data/ear_rules_unweighted.pkl', 
                            word_weights=defaultdict(lambda: 1), 
                            is_sparse=True,
                            beta=r/10,
                            parallel_rules=True,
                            parallel_weight=parallel_weight) 

train_articles = create_articles(train_supports, ear_rules=ear_rules, ar_weight=0, ear_weight=1, nn_weight=0)
solution = run_greedy(articles=train_articles, layout=layout, distance_weight=dw, rule_weight=1, rule_weight_for_article_scores=aw, penalty_weight=p, verbose=False, hp_tuning=False)
average_distance_travelled = evaluate_solution_for_greedy(solution, test_df, layout, batch_size=1, verbose=True)
print('Result:', average_distance_travelled)

In [None]:
# AR results!
dw = 5e4
aw = 1
p = 0

scores = defaultdict(list)



itemsets, itemsets_size_2 = generate_itemsets(train_df, min_support=0.0001)

ar_rules = compute_ar_rules(itemsets_size_2, metric='lift', min_threshold=1, 
                            save_path=f'../Data/ar_rules.pkl')


train_articles = create_articles(train_supports, ar_rules=ar_rules, ar_weight=1, ear_weight=0, nn_weight=0)
solution = run_greedy(articles=train_articles, layout=layout, distance_weight=dw, rule_weight=1, rule_weight_for_article_scores=aw, penalty_weight=p, verbose=False, hp_tuning=False)
average_distance_travelled = evaluate_solution_for_greedy(solution, test_df, layout, batch_size=1, verbose=True)
print('Result:', average_distance_travelled)


In [None]:
# NNR
dw = 1e4
aw = 1
p = 0





train_articles = create_articles(train_supports, nn_rules=nn_rules, ar_weight=0, ear_weight=0, nn_weight=1)
solution = run_greedy(articles=train_articles, layout=layout, distance_weight=dw, rule_weight=1, rule_weight_for_article_scores=aw, penalty_weight=p, verbose=False, hp_tuning=False)
average_distance_travelled = evaluate_solution_for_greedy(solution, test_df, layout, batch_size=1, verbose=True)
print('Result:', average_distance_travelled)

In [None]:
# All rules together

parallel_weight = 10
gamma = 1.6
delta = 3.5
k = 2
r = 1

ear_weight = 1e-4
nn_weight = 10

dw = 5e5
aw = 1
p = 0



itemsets, itemsets_size_2 = generate_itemsets(train_df, min_support=0.0001)

ar_rules = compute_ar_rules(itemsets_size_2, metric='lift', min_threshold=1, 
                            save_path=f'../Data/ar_rules.pkl')

words = vectorizer.cv.get_feature_names_out()
word_scores, single_word_scores, word_frequencies = calculate_word_scores(train_unique, 
                                                                            train_orders, 
                                                                            words, 
                                                                            gamma=gamma,
                                                                            delta=delta)
                                                                            
min_score = np.min([score for score in word_scores if score > 0])
word_scores = [score if score > 0 else min_score/2 for score in word_scores]
ear_rules = get_ear_rules(ar_rules, 
                            unique_items=list(all_unique),
                            k=k,
                            r=r, 
                            embeddings_path=f'../Data/sparse_bow.pkl', 
                            save_path=f'../Data/ear_rules_pair_scores.pkl', 
                            word_weights=word_scores, 
                            is_sparse=True,
                            beta=r/10,
                            parallel_rules=True,
                            parallel_weight=parallel_weight) 


train_articles = create_articles(train_supports, ar_rules=ar_rules, ear_rules=ear_rules, nn_rules=nn_rules, ar_weight=1, ear_weight=ear_weight, nn_weight=nn_weight)
solution = run_greedy(articles=train_articles, layout=layout, distance_weight=dw, rule_weight=1, rule_weight_for_article_scores=aw, penalty_weight=p, verbose=False, hp_tuning=False)
average_distance_travelled = evaluate_solution_for_greedy(solution, test_df, layout, batch_size=1, verbose=True)
print('Result:', average_distance_travelled)