In [None]:
# parallelisation of Hybrid Group-IPF Lasso

# Upload the data

import pandas as pd
import numpy as np
import seaborn as sns
import cvxpy as cp
import time
import matplotlib.pyplot as plt

# X_train, X_test, Y_train, Y_test = ...
# flux_dimensions_index = size of features for the fluxes - 1
# gene_dimensions_index = size of features for the genes - 1

indices1 = []                        #  indices identifying the groups for the fluxes
indices2 = []                        #  indices identifying the groups for the genes

In [None]:
import worker_function as wf
import multiprocess
from itertools import repeat as iterreap

number_of_workers = 12               
lambd_values = np.logspace(-0.6, 0.7, number_of_workers)                   
intercept_values = np.logspace(-1.6, 0.04, number_of_workers)

lambdas1 = np.repeat(lambd_values, len(intercept_values)*len(lambd_values))
lambdas2 = np.repeat(lambd_values, len(intercept_values))
lambdas2 = np.asarray([lambdas2[:] for _ in range(len(lambd_values))]).flatten()
intercepts =  np.asarray([intercept_values[:] for _ in range(len(lambd_values)**2)]).flatten()


if __name__ ==  '__main__': 
    start_time = time.time()
    print("Starting process...")
    with multiprocess.Pool(20) as pool:
        results = pool.starmap(wf.optimise, zip(range(len(lambdas1)), lambdas1, # we pass a universal id, not the worker id
                                      lambdas2, intercepts, iterreap(X_train), iterreap(Y_train), 
                                      iterreap(X_test), iterreap(Y_test), iterreap(flux_dimensions_index), 
                                      iterreap(gene_dimensions_index), iterreap(indices1), iterreap(indices2)))
    
    print("Finished. Total time required (in minutes): ", (time.time() - start_time)/60)


In [None]:
print(len(results), len(lambdas1))
clean_results = list()
for idx, res in enumerate(results):
    if idx == 0:
        clean_results = [res[0], res[1], res[2], res[3], res[6], res[7], res[8]]
    else:
        clean_results = np.vstack((clean_results, [res[0], res[1], res[2], res[3], res[6], res[7], res[8]]))
    if res[8] <= 0.0058:
        print("Worker {}, Lambda1: {}, Lambda2: {}, Intercept: {}, Total time: {}, Train error: {}, Test error: {}".format(
            res[0], res[1], res[2], res[3], res[6], res[7], res[8]))
        
pd.DataFrame(clean_results, columns=["Worker", "Lambda1", "Lambda2", "Intercept", "Total time", "Train error", "Test error"]).to_csv("", encoding="utf-8")