# ðŸ‘» Ghost on Local
---
[![Phantom](https://img.shields.io/badge/Version-2.0-teal.svg)]()
[![Phantom](https://img.shields.io/badge/Github-ðŸ‘»-black.svg)](https://github.com/Ming-doan/timeseries-imputation.git)

In [None]:
import pandas as pd
from modules.alias import get_by_aliases
from modules.utils.preprocessing import Preprocessing
from modules.utils.missing import CreateMissingDataFrame
from modules.train import Trainer
from modules.models import *
from modules.utils.callbacks import SavePlot, Combined
from modules.utils.generator import WindowGenerator
from modules.utils.cache import Cache

In [None]:
# Datasets
DATASET = 'PhuLien'
SELECTED_COLUMNS = ['WaterLevel']
# Missing values
MISSING_PERCENTAGE = 10
MISSING_GAPS = 1
SEED = 123
MODE = 'Linear' # 'Random', 'Linear'
# Models
WINDOW_SIZE = 200
BATCH_SIZE = 10
MODELS = get_by_aliases(['cnn1d'])

### Preprocessing

In [None]:
import os
if not os.path.exists('results'):
    os.makedirs('results')

In [None]:
# Read CSV
df = pd.read_csv(f'{DATASET}.csv')
df = df[SELECTED_COLUMNS]

In [None]:
# Preprocessing data
preprocessing = Preprocessing()
df = preprocessing.flow(df)

In [None]:
# Create missing data
creator = CreateMissingDataFrame(df, MISSING_PERCENTAGE, MISSING_GAPS, split_mode=MODE, seed=SEED)

In [None]:
creator.plot(save_path='results/missing_data.png')

In [None]:
missing_length = creator.missing_indexs[0][1] - creator.missing_indexs[0][0]
assert missing_length > WINDOW_SIZE, f'Window size is too large. Expect smaller than {missing_length}, found {WINDOW_SIZE}.'
assert missing_length > BATCH_SIZE, f'Batch size is too large. Expect smaller than {missing_length}, found {BATCH_SIZE}.'

### Training

In [None]:
# Training
trainer = Trainer(model=MODELS)
combined_callback = Combined(n_models=len(MODELS), save_directory="results")
sp = SavePlot(n_models=len(MODELS), save_directory="results")
cache = Cache(live_cache=True)

for train_df, test_df in creator:
    train_gen = WindowGenerator(train_df, WINDOW_SIZE, BATCH_SIZE)
    test_gen = WindowGenerator(test_df, WINDOW_SIZE, BATCH_SIZE)

    trainer.train(train_gen, test_gen, callbacks=[sp, combined_callback], cache=cache)
    trainer.reset()

In [None]:
combined_callback.metrics.metrics.to_csv('results/metrics.csv', index=False)
combined_callback.metrics.metrics