In [1]:
import os
import pandas as pd 
import numpy as np
from multiprocessing import Pool 
import multiprocessing
from data_loader import data_loader #data_loader.py 파일을 다운 받아 주셔야 합니다. 
from tqdm import tqdm
from functools import partial

In [7]:
def data_loader_all(func, path, train, nrows, **kwargs):
    '''
    Parameters:
    
    func: 하나의 csv파일을 읽는 함수 
    path: [str] train용 또는 test용 csv 파일들이 저장되어 있는 폴더 
    train: [boolean] train용 파일들 불러올 시 True, 아니면 False
    nrows: [int] csv 파일에서 불러올 상위 n개의 row 
    lookup_table: [pd.DataFrame] train_label.csv 파일을 저장한 변수 
    event_time: [int] 상태_B 발생 시간 
    normal: [int] 상태_A의 라벨
    
    Return:
    
    combined_df: 병합된 train 또는 test data
    '''
    
    # 읽어올 파일들만 경로 저장 해놓기 
    files_in_dir = os.listdir('D:\/train')
    
    files_path = ['D:\/train'+'/'+file for file in files_in_dir]
    
    if train :
        func_fixed = partial(func, nrows = nrows, train = True, lookup_table = kwargs['lookup_table'], event_time = kwargs['event_time'], normal = kwargs['normal'])
        
    else : 
        func_fixed = partial(func, nrows = nrows, train = False)
    
    
    # 여러개의 코어를 활용하여 데이터 읽기 
    if __name__ == '__main__':
        pool = Pool(processes = multiprocessing.cpu_count()) 
        df_list = list(tqdm(pool.imap(func_fixed, files_path), total = len(files_path)))
        pool.close()
        pool.join()
    
    # 데이터 병합하기 
    combined_df = pd.concat(df_list, ignore_index=True)
    
    return combined_df
    

In [10]:
train_path = 'train'
test_path = 'test'
label = pd.read_csv('C:\/data/train_label.csv')
train = data_loader_all(data_loader, path = train_path, train = True, nrows = 100, normal = 999, event_time = 10, lookup_table = label)




  0%|                                                                                          | 0/828 [00:00<?, ?it/s]


  0%|                                                                                | 1/828 [00:11<2:41:49, 11.74s/it]


  0%|▏                                                                               | 2/828 [00:17<2:15:11,  9.82s/it]


  0%|▎                                                                               | 3/828 [00:19<1:42:39,  7.47s/it]


  1%|▋                                                                               | 7/828 [00:20<1:13:06,  5.34s/it]


  1%|▊                                                                                 | 8/828 [00:22<57:15,  4.19s/it]


  1%|▉                                                                                 | 9/828 [00:22<42:12,  3.09s/it]


  1%|█                                                                                | 11/828 [00:24<32:55,  2.42s/it]


  1%|█▏              

 11%|█████████                                                                        | 92/828 [01:44<14:14,  1.16s/it]


 11%|█████████▏                                                                       | 94/828 [01:44<10:24,  1.18it/s]


 11%|█████████▎                                                                       | 95/828 [01:47<16:52,  1.38s/it]


 12%|█████████▍                                                                       | 96/828 [01:48<16:20,  1.34s/it]


 12%|█████████▋                                                                       | 99/828 [01:51<14:23,  1.18s/it]


 12%|█████████▋                                                                      | 100/828 [01:52<14:09,  1.17s/it]


 12%|█████████▊                                                                      | 101/828 [01:52<10:28,  1.16it/s]


 12%|█████████▉                                                                      | 103/828 [01:54<11:42,  1.03it/s]


 13%|██████████         

 21%|████████████████▋                                                               | 173/828 [02:59<10:14,  1.07it/s]


 21%|████████████████▉                                                               | 175/828 [03:00<09:40,  1.13it/s]


 21%|█████████████████                                                               | 176/828 [03:02<11:17,  1.04s/it]


 21%|█████████████████                                                               | 177/828 [03:02<10:00,  1.08it/s]


 22%|█████████████████▎                                                              | 179/828 [03:04<09:29,  1.14it/s]


 22%|█████████████████▍                                                              | 180/828 [03:05<11:08,  1.03s/it]


 22%|█████████████████▍                                                              | 181/828 [03:06<09:38,  1.12it/s]


 22%|█████████████████▋                                                              | 183/828 [03:08<09:36,  1.12it/s]


 22%|█████████████████▊ 

 30%|███████████████████████▊                                                        | 247/828 [04:06<08:33,  1.13it/s]


 30%|███████████████████████▉                                                        | 248/828 [04:08<10:32,  1.09s/it]


 30%|████████████████████████                                                        | 249/828 [04:09<11:58,  1.24s/it]


 30%|████████████████████████▎                                                       | 251/828 [04:10<08:41,  1.11it/s]


 30%|████████████████████████▎                                                       | 252/828 [04:11<10:15,  1.07s/it]


 31%|████████████████████████▍                                                       | 253/828 [04:13<12:12,  1.27s/it]


 31%|████████████████████████▋                                                       | 255/828 [04:13<08:51,  1.08it/s]


 31%|████████████████████████▋                                                       | 256/828 [04:14<09:48,  1.03s/it]


 31%|███████████████████

 39%|███████████████████████████████                                                 | 321/828 [05:13<10:48,  1.28s/it]


 39%|███████████████████████████████                                                 | 322/828 [05:13<07:53,  1.07it/s]


 39%|███████████████████████████████▎                                                | 324/828 [05:14<06:29,  1.29it/s]


 39%|███████████████████████████████▍                                                | 325/828 [05:16<10:02,  1.20s/it]


 39%|███████████████████████████████▍                                                | 326/828 [05:16<08:10,  1.02it/s]


 40%|███████████████████████████████▋                                                | 328/828 [05:17<06:41,  1.25it/s]


 40%|███████████████████████████████▊                                                | 329/828 [05:19<09:52,  1.19s/it]


 40%|███████████████████████████████▉                                                | 330/828 [05:20<08:02,  1.03it/s]


 40%|███████████████████

ParserError: Error tokenizing data. C error: out of memory

In [3]:
train_path = 'train'
test_path = 'test'
label = pd.read_csv('C:\/data/train_label.csv')
test = data_loader_all(data_loader, path = test_path, train = False, nrows = 60)

100%|████████████████████████████████████████████████████████████████████████████████| 720/720 [04:16<00:00,  2.29it/s]


In [5]:
test.head(40)

Unnamed: 0,id,time,V0000,V0001,V0002,V0003,V0004,V0005,V0006,V0007,...,V5111,V5112,V5113,V5114,V5115,V5116,V5117,V5118,V5119,V5120
0,1000,0,30.465579,8.679198,8.706214,8.698763,8.681921,175.895526,225.599651,3.4123179999999996e-20,...,1,1,1,1,60,127.58,112.263,-0.2647287,85.4,-0.004474
1,1000,1,30.467312,8.713533,8.683039,8.705332,8.758377,169.915262,245.457733,6.356187999999999e-19,...,1,1,1,1,60,0.0,0.0,5.22957e-06,85.4,0.0
2,1000,2,30.469018,8.846273,8.723076,8.706312,8.71243,195.202734,157.454041,1.994417e-19,...,1,1,1,1,60,0.0,0.0,-1.577439e-05,85.4,0.0
3,1000,3,30.462412,8.836801,8.717034,8.699487,8.689871,166.871753,196.083128,5.689101e-19,...,1,1,1,1,60,0.0,0.0,9.626164e-06,85.4,0.0
4,1000,4,30.467925,8.813212,8.703835,8.720919,8.703865,186.028985,158.724162,-4.507803e-19,...,1,1,1,1,60,0.0,0.0,2.624449e-05,85.4,0.0
5,1000,5,30.471406,8.702561,8.725561,8.777424,8.743822,181.753252,247.075191,4.484936e-19,...,1,1,1,1,60,0.0,0.0,1.623289e-05,85.4,0.0
6,1000,6,30.465731,8.850617,8.731008,8.664739,8.719825,180.070609,220.626422,-3.494443e-19,...,1,1,1,1,60,0.0,0.0,1.683652e-05,85.4,0.0
7,1000,7,30.485849,8.715682,8.703027,8.734264,8.735284,160.84042,140.993982,8.991803e-20,...,1,1,1,1,60,0.0,0.0,1.603708e-05,85.4,0.0
8,1000,8,30.48081,8.866624,8.693112,8.689413,8.717165,198.775454,208.841439,2.794357e-19,...,1,1,1,1,60,0.0,0.0,2.10621e-05,85.4,0.0
9,1000,9,30.4759,8.8561,8.712908,8.678796,8.716222,197.049888,169.094527,-1.6841459999999998e-19,...,1,1,1,1,60,0.0,0.0,2.862594e-06,85.4,0.0
