# Template

This is the template notebook for working with _SeqRep_ package.

You can [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/MIR-MU/seqrep/blob/main/examples/TEMPLATE.ipynb)
or
[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/MIR-MU/seqrep/main?labpath=examples%2FTEMPLATE.ipynb).

## Install _SeqRep_ Package

In [None]:
!python -m pip install git+https://github.com/MIR-MU/seqrep

## Import Needed Packages

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.neural_network import MLPClassifier

from seqrep import *
from seqrep.feature_engineering import *
from seqrep.labeling import *
from seqrep.splitting import *
from seqrep.scaling import *
from seqrep.feature_reduction import *
from seqrep.evaluation import *
from seqrep.pipeline_evaluation import *

## Load or Download Data

In [None]:
# # You can for example download the price data from Yahoo Finance.

# !pip install yfinance
# import yfinance as yf

# ticker = "BTC-USD"

# df = yf.download(tickers=ticker, period="2500d", interval="1d")
# df

In [None]:
# # TODO: specify your data if you want to load them from a file, for example.
# df = pd.read_csv(
#     "<file_name>",
#     names=['date', 'time', 'open', 'high', 'low', 'close', 'volume'],
#     parse_dates=[['date', 'time']],
#     index_col=0,
#     )
# df

In [None]:
# column names have to be lowercase
df.columns = df.columns.str.lower()

# you may want to delete lines that contain NaN
df.dropna(inplace=True)

# # or you may want to omit rows of zero in some column (e.g. volume)
# df.drop(index=df[df.volume == 0].index, inplace=True)

df

## Run Pipeline Evaluation

In [None]:
# This DataFrame collects the results of various runs for comparison.

# Uncomment following line if you want to clear the DataFrame with the results.
# del results_for_comparison

try:
    results_for_comparison
except NameError:
    print("Create new empty DataFrame.")
    results_for_comparison = pd.DataFrame()
else:
    print("DataFrame already exist!")

In [None]:
%%capture --no-stdout --no-display

run_identification = "Short identification of this run"

# 1. step - define your pipeline
pipe = Pipeline(
    [
        # TODO: add or remove arbitrary Feature Extractor
        ("fext_prev", PreviousValuesExtractor()),
        ("fext_time", TimeFeaturesExtractor()),
        # TODO: Select which scaler you want to use
        # ("scale_u", UniversalScaler(scaler=MinMaxScaler())),
        ("scale_u", UniversalScaler(scaler=StandardScaler())),
    ]
)

# 2. step - define your workflow
pipe_eval = PipelineEvaluator(
    # TODO: Select the desired labeler
    labeler=NextColorLabeler(),
    splitter=TrainTestSplitter(),
    pipeline=pipe,
    # TODO: Select the desired feature reductor, if you want to use it
    feature_reductor=UnivariateFeatureSelector(number=0.25),
    # TODO: Select the desired model, if you want to use it
    model=MLPClassifier(shuffle=False),
    # TODO: Select the desired evaluator, if you want to use it
    evaluator=ClassificationEvaluator(),
)
# 3. step
result = pipe_eval.run(data=df)

results_for_comparison = results_for_comparison.append(
    pd.Series(result, name=run_identification),
)

In [None]:
results_for_comparison