In [1]:
import sys
sys.path.append('../')

from baselines.FixTimeRegressor import FixTimeRegressor

import pandas as pd
import tqdm
import numpy as np

In [2]:
train_series = pd.read_parquet('../data/raw/train_series.parquet')

In [3]:
participant_series_id = '038441c925bb'

In [4]:
def reduce_memory(df):
    start_memory = df.memory_usage().sum() / 1024**2
    print(f'Memory usage before cleanup is {start_memory:.2f} MB')
    
    for col in tqdm.tqdm(df.columns):
        column_type = df[col].dtype
        if column_type != object:
            max_value = df[col].max()
            min_value = df[col].min()
            if str(column_type) == 'uint32':
                if min_value > np.iinfo(np.uint8).min and max_value < np.iinfo(np.uint8).max:
                    df[col] = df[col].astype(np.uint8)
                elif min_value > np.iinfo(np.uint16).min and max_value < np.iinfo(np.uint16).max:
                    df[col] = df[col].astype(np.uint16)
            elif str(column_type) == 'float32':
                if min_value > np.finfo(np.float16).min and max_value < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
        df['series_id'] = df['series_id'].astype('category')

    end_memory = df.memory_usage().sum() / 1024**2
    print(f'Memory usage after cleanup is {end_memory:.2f} MB')
    
    improvement = (start_memory - end_memory) / start_memory * 100
    print(f'Memory usage improved by {improvement:.2f}%')
    
    return df

In [5]:
dataframe = reduce_memory(train_series)

Memory usage before cleanup is 3416.54 MB


100%|██████████| 5/5 [00:41<00:00,  8.38s/it]

Memory usage after cleanup is 2196.35 MB
Memory usage improved by 35.71%





In [6]:
regressor = FixTimeRegressor('06:00:00', '22:00:00')

In [7]:
results = regressor.predict(dataframe[dataframe['series_id'] == participant_series_id])

100%|██████████| 389880/389880 [01:21<00:00, 4779.38it/s]


In [8]:
results.to_csv('../results/baseline_submission.csv', sep=',', index=False)