In [1]:
import time
import math
import tqdm

import numpy as np
import multiprocessing

from sklearn import metrics
from itertools import repeat

from Function import svm_function

class multi_ensemble_svm():
    def __init__(self):
        self.model_array = None
        self.model_size = None
        self.hp = {
            "kernel": 'C_linear', "C":0, "logGamma":0, "degree":0, "coef0":0, "n":0.5, "max_iter":1e7
        }
    
    def train(self, data, label, ensemble_data_size=1, kernel='C_linear', C=0, logGamma=0, degree=0, coef0=0, n=0.5, max_iter=1e7):
        train_time = time.time()
        self.hp["kernel"] = kernel
        self.hp["C"] = C
        self.hp["logGamma"] = logGamma
        self.hp["degree"] = degree
        self.hp["coef0"] = coef0
        self.hp["n"] = n
        self.hp["max_iter"] = max_iter
        
        x, y = svm_function.ensemble_data(data, label, size=ensemble_data_size)
        self.model_size = len(x)
        
        pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
        # with multiprocessing.Pool(processes=multiprocessing.cpu_count()) as pool:
        result = pool.starmap(self._svm_train, tqdm.tqdm(zip(x, y), total=self.model_size))
        self.model_array = result
        print("ensemble svm train: %.2fs" % (time.time() - train_time))
        
        return None

    def _svm_train(self, d, l):
        arr = np.arange(len(l))
        np.random.shuffle(arr)
        d = d[arr]
        l = l[arr]
        
        m = svm_function.svm_train_model(d, l, self.hp["kernel"], self.hp["C"], self.hp["logGamma"], self.hp["degree"], self.hp["coef0"], self.hp["n"], max_iter=self.hp["max_iter"])
        return m
    
    def test(self, x, y):
        pred_y, pred_y_score = self.predict(x)
            
        return metrics.roc_auc_score(y, pred_y_score)
    
    def _svm_predict(self, m, x):
        return m.predict(x)
    
    def predict(self, x):
        output = None
        pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
        output = pool.starmap(self._svm_predict, tqdm.tqdm(zip(self.model_array, repeat(x)), total=self.model_size))
        # for m in self.model_array:
        #     p_label = m.predict(x)
        #     if output is None:
        #         output = np.array([p_label])
        #     else:
        #         output = np.append(output, np.array([p_label]), axis=0)
        print(np.array(output).shape)
        
        # pred_y = []
        # pred_y_score = []
        # for o in output.T:
        #     u, c = np.unique(o, return_counts=True)
        #     pred_y.append(u[c == c.max()][0])

        #     if (u == 1).any():
        #         pred_y_score.append(c[u == 1][0] / sum(c))
        #     else:
        #         pred_y_score.append(0)

        # return pred_y, pred_y_score

X = np.load("data/merge_data/0916chiu/classifier/nuc_lncRNA__cyto_lncRNA_train.npy")
y = np.load("data/merge_data/0916chiu/classifier/nuc_lncRNA__cyto_lncRNA_train_y.npy")

In [3]:
# x_train, y_train = svm_function.ensemble_data(X, y, size=50)
clf = multi_ensemble_svm()
clf.multi_train(X, y, ensemble_data_size=10)
# clf.predict(X)

100%|██████████| 70/70 [00:00<00:00, 170.67it/s]


ensemble svm train: 1.16s


In [5]:
x_train, y_train = svm_function.ensemble_data(X, y, size=10)
clf = multi_ensemble_svm()
clf.train(x_train, y_train)

auroc: 0.6923928775053726
ensemble svm step: 1/70 | 0.12s
auroc: 0.6658699179860533
ensemble svm step: 2/70 | 0.11s
auroc: 0.6868448752247709
ensemble svm step: 3/70 | 0.11s
auroc: 0.6777115038814088
ensemble svm step: 4/70 | 0.11s
auroc: 0.6863514758124644
ensemble svm step: 5/70 | 0.12s
auroc: 0.6631945967282137
ensemble svm step: 6/70 | 0.11s
auroc: 0.6870860927152319
ensemble svm step: 7/70 | 0.13s
auroc: 0.6756392263497215
ensemble svm step: 8/70 | 0.15s
auroc: 0.6871299504407702
ensemble svm step: 9/70 | 0.11s
auroc: 0.6705517301872725
ensemble svm step: 10/70 | 0.11s
auroc: 0.6858142186746196
ensemble svm step: 11/70 | 0.11s
auroc: 0.6861541160475417
ensemble svm step: 12/70 | 0.11s
auroc: 0.6762970922327968
ensemble svm step: 13/70 | 0.13s
auroc: 0.6916582606026052
ensemble svm step: 14/70 | 0.11s
auroc: 0.6745427832112627
ensemble svm step: 15/70 | 0.11s
auroc: 0.6614293232752949
ensemble svm step: 16/70 | 0.11s
auroc: 0.6754966887417219
ensemble svm step: 17/70 | 0.11s
auroc:

In [31]:
print(multiprocessing.cpu_count())

64


In [40]:
def cube(x, y):
    time.sleep(0.5)
    return x**3 + y

pool = multiprocessing.Pool(multiprocessing.cpu_count())
start_time = time.perf_counter()
# result = pool.starmap(cube, zip(range(1,100), range(1,100)))
result = pool.starmap(cube, tqdm.tqdm(zip(range(1,1000), range(1,1000)), total=len(range(1,1000))))
finish_time = time.perf_counter()
print(f"Program finished in {finish_time-start_time} seconds")
print(result)
# start_time = time.time()
# a = []
# for i in range(1,100):
#     a.append(cube(i, i))
# finish_time = time.time()
# print(f"Program finished in {finish_time-start_time} seconds")
# print(a)

100%|██████████| 999/999 [00:06<00:00, 165.69it/s]


Program finished in 8.036148868966848 seconds
[2, 10, 30, 68, 130, 222, 350, 520, 738, 1010, 1342, 1740, 2210, 2758, 3390, 4112, 4930, 5850, 6878, 8020, 9282, 10670, 12190, 13848, 15650, 17602, 19710, 21980, 24418, 27030, 29822, 32800, 35970, 39338, 42910, 46692, 50690, 54910, 59358, 64040, 68962, 74130, 79550, 85228, 91170, 97382, 103870, 110640, 117698, 125050, 132702, 140660, 148930, 157518, 166430, 175672, 185250, 195170, 205438, 216060, 227042, 238390, 250110, 262208, 274690, 287562, 300830, 314500, 328578, 343070, 357982, 373320, 389090, 405298, 421950, 439052, 456610, 474630, 493118, 512080, 531522, 551450, 571870, 592788, 614210, 636142, 658590, 681560, 705058, 729090, 753662, 778780, 804450, 830678, 857470, 884832, 912770, 941290, 970398, 1000100, 1030402, 1061310, 1092830, 1124968, 1157730, 1191122, 1225150, 1259820, 1295138, 1331110, 1367742, 1405040, 1443010, 1481658, 1520990, 1561012, 1601730, 1643150, 1685278, 1728120, 1771682, 1815970, 1860990, 1906748, 1953250, 2000502,

Process ForkPoolWorker-1919:
Process ForkPoolWorker-1888:
Process ForkPoolWorker-1925:
Process ForkPoolWorker-1936:
Process ForkPoolWorker-1911:
Process ForkPoolWorker-1913:
Process ForkPoolWorker-1920:
Process ForkPoolWorker-1891:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/jand/.conda/envs/jand_venv/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/jand/.conda/envs/jand_venv/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/jand/.conda/envs/jand_venv/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/jand/.conda/envs/jand_venv/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/jand/.conda/envs/jand_venv/lib/python3.6/multiprocessing/pool.py", line 108, in worker
    task = get()
  File 