In [8]:
import numpy as np
import pandas as pd
import lightgbm as lgb
import multiprocessing
from concurrent.futures import ThreadPoolExecutor
import time

In [37]:

print('CPUs available:',multiprocessing.cpu_count())

CPUs available:  16


In [6]:
#creating the synthetic data
row_count = 500
X_vars = [np.random.rand(row_count, 1)*np.random.randint(2,10) for i in range(6)]
Y_vars = [ x_var*(index+2) for index,x_var in enumerate(X_vars)]

X_cols = [f'X{i}' for i in range(len(Y_vars))]
Y_cols = [f'Y{i}' for i in range(len(Y_vars))]
all_data = pd.DataFrame(np.concatenate(Y_vars+X_vars, axis=1), columns=Y_cols+X_cols)
all_data.head(2)

Unnamed: 0,Y0,Y1,Y2,Y3,Y4,Y5,X0,X1,X2,X3,X4,X5
0,4.198349,2.425459,12.407168,3.705444,8.932568,28.119935,2.099174,0.808486,3.101792,0.741089,1.488761,4.017134
1,0.459737,0.691016,4.015973,29.323626,0.260856,19.216033,0.229868,0.230339,1.003993,5.864725,0.043476,2.745148


In [20]:
class ModelWrapper:

    def __init__(self,targets,features, data, is_parallel) -> None:
        self.targets = targets
        self.features = features
        self.models = {}
        self.data = data
        self.is_parallel = is_parallel
        self.total_time_taken = 0
        

    def train_inner_model(self, target):
        print(f'training inner target: {target}')
        self.models[target] = lgb.LGBMRegressor(verbose=-1)
        self.models[target].fit(X=self.data[self.features], y=self.data[target])

    def train(self, n_threads):
        start_time = time.time()
        if self.is_parallel:
            with ThreadPoolExecutor(max_workers=n_threads) as executor:
                for target in self.targets:
                    executor.submit(self.train_inner_model,target )
        else:
            for target in self.targets:
                self.train_inner_model(target)
        end_time = time.time()
        self.total_time_taken = end_time-start_time

        print(f'total time taken : {np.round(self.total_time_taken,4)} seconds')

        

In [35]:
models_cluster = ModelWrapper(targets=Y_cols, 
                              features=X_cols,
                              is_parallel=False,
                              data=all_data)

models_cluster.train(6)

training inner target: Y0
training inner target: Y1
training inner target: Y2
training inner target: Y3
training inner target: Y4
training inner target: Y5
total time taken : 0.231 seconds


In [36]:
models_cluster = ModelWrapper(targets=Y_cols, 
                              features=X_cols,
                              is_parallel=True,
                              data=all_data)

models_cluster.train(6)

training inner target: Y0
training inner target: Y1
training inner target: Y2
training inner target: Y3
training inner target: Y4
training inner target: Y5
total time taken : 0.3493 seconds
