In [6]:
import sys
sys.path.append('../')

from baselines.FixTimeRegressor import FixTimeRegressor

from datetime import datetime
import pandas as pd
import tqdm
import numpy as np

In [7]:
train_series = pd.read_parquet('../data/raw/train_series.parquet')

In [8]:
def reduce_memory(df):
    start_memory = df.memory_usage().sum() / 1024**2
    print(f'Memory usage before cleanup is {start_memory:.2f} MB')
    
    for col in tqdm.tqdm(df.columns):
        column_type = df[col].dtype
        if column_type != object:
            max_value = df[col].max()
            min_value = df[col].min()
            if str(column_type) == 'uint32':
                if min_value > np.iinfo(np.uint8).min and max_value < np.iinfo(np.uint8).max:
                    df[col] = df[col].astype(np.uint8)
                elif min_value > np.iinfo(np.uint16).min and max_value < np.iinfo(np.uint16).max:
                    df[col] = df[col].astype(np.uint16)
            elif str(column_type) == 'float32':
                if min_value > np.finfo(np.float16).min and max_value < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
        df['series_id'] = df['series_id'].astype('category')

    end_memory = df.memory_usage().sum() / 1024**2
    print(f'Memory usage after cleanup is {end_memory:.2f} MB')
    
    improvement = (start_memory - end_memory) / start_memory * 100
    print(f'Memory usage improved by {improvement:.2f}%')
    
    return df

In [9]:
dataframe = reduce_memory(train_series)

Memory usage before cleanup is 3416.54 MB


100%|██████████| 5/5 [00:12<00:00,  2.52s/it]

Memory usage after cleanup is 2196.35 MB
Memory usage improved by 35.71%





In [10]:
regressor = FixTimeRegressor('06:00:00', '22:00:00')

In [11]:
results = regressor.predict(dataframe.head(2_000_000))

  0%|          | 0/2000000 [00:00<?, ?it/s]

100%|██████████| 2000000/2000000 [02:23<00:00, 13935.99it/s]


In [13]:
results.to_csv('submission.csv', sep=',', index=False)