# Simple Classification Example

## Install _SeqRep_ package

In [1]:
!python -m pip install git+https://github.com/MIR-MU/seqrep

Collecting git+https://github.com/MIR-MU/seqrep
  Cloning https://github.com/MIR-MU/seqrep to /tmp/pip-req-build-1qjc3l5i
  Running command git clone -q https://github.com/MIR-MU/seqrep /tmp/pip-req-build-1qjc3l5i


## Import Needed Parts

In [2]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC

# Required packages
!pip install pandas_ta
!pip install ta
!pip install hrv-analysis

from seqrep.feature_engineering import PreviousValuesExtractor, TimeFeaturesExtractor
from seqrep.labeling import NextColorLabeler
from seqrep.splitting import TrainTestSplitter
from seqrep.scaling import UniversalScaler
from seqrep.evaluation import ClassificationEvaluator
from seqrep.pipeline_evaluation import PipelineEvaluator

# Data Source
!pip install yfinance
import yfinance as yf



## Load Data
In this example, we will use the price data of *Apple shares* from *Yahoo-Finance*.

In [3]:
data = yf.download(tickers="AAPL", period="10000d", interval="1d")
# column names have to be lowercase
data.columns = data.columns.str.lower()
data

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,open,high,low,close,adj close,volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1982-02-26,0.081473,0.082031,0.081473,0.081473,0.063859,17427200
1982-03-01,0.082031,0.082589,0.082031,0.082031,0.064296,35302400
1982-03-02,0.082031,0.082589,0.082031,0.082031,0.064296,34809600
1982-03-03,0.082031,0.082589,0.082031,0.082031,0.064296,23654400
1982-03-04,0.080915,0.080915,0.080357,0.080357,0.062984,38371200
...,...,...,...,...,...,...
2021-10-18,143.449997,146.839996,143.160004,146.550003,146.550003,85589200
2021-10-19,147.009995,149.169998,146.550003,148.759995,148.759995,76378900
2021-10-20,148.699997,149.750000,148.119995,149.259995,149.259995,58418800
2021-10-21,148.809998,149.639999,147.869995,149.479996,149.479996,61421000


## Run Pipeling with Evaluation
This is the simplest way to use this framework. Pipeline transformations are performed and then the selected model is evaluated on splitted data.

In [4]:
%%capture --no-stdout --no-display
# 1. step
pipe = Pipeline(
    [
        ("fext_prev", PreviousValuesExtractor()),
        ("fext_time", TimeFeaturesExtractor()),
        ("scale_u", UniversalScaler(scaler=MinMaxScaler())),
    ]
)

# 2. step
pipe_eval = PipelineEvaluator(
    labeler=NextColorLabeler(),
    splitter=TrainTestSplitter(),
    pipeline=pipe,
    model=SVC(),
    evaluator=ClassificationEvaluator(),
)
# 3. step
result = pipe_eval.run(data=data)

09:41:24.034 Labeling data
09:41:24.036 Splitting data
09:41:24.039 Fitting pipeline
09:41:24.094 Applying pipeline transformations
09:41:24.153 	Original shape:		(7500, 19); 
		shape after removing NaNs: (7499, 19).
09:41:24.160 	Original shape:		(2500, 19); 
		shape after removing NaNs: (2499, 19).
09:41:24.161 Fitting model
09:41:27.745 Predicting
09:41:28.564 Evaluating predictions
[[1173   29]
 [1264   33]] 
 48.25930372148859 % accuracy
 53.2258064516129 % precision of 1 classes
 2.5443330763299925 % recall of 1 classes

              precision    recall  f1-score   support

           0       0.48      0.98      0.64      1202
           1       0.53      0.03      0.05      1297

    accuracy                           0.48      2499
   macro avg       0.51      0.50      0.35      2499
weighted avg       0.51      0.48      0.34      2499



## Run Pipeling with Evaluation and Feature Reduction
In this example, we use _feature selection_ for reduction of number of features. Half of the features remains (because of `number=0.5`).

For the evaluation, we use the _UniversalEvaluator_ in this case.

In [5]:
%%capture --no-stdout --no-display
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, recall_score

from seqrep.feature_engineering import TAExtractor
from seqrep.feature_reduction import UnivariateFeatureSelector
from seqrep.evaluation import UniversalEvaluator

# 1. step
pipe = Pipeline(
    [
        ("fext_prev", PreviousValuesExtractor()),
        ("fext_time", TimeFeaturesExtractor()),
        ("fext_ta", TAExtractor(all_features=True)),
        ("scale_u", UniversalScaler(scaler=MinMaxScaler())),
    ]
)
evaluator = UniversalEvaluator(
    metrics=[accuracy_score, roc_auc_score, precision_score, recall_score]
)
# 2. step
pipe_eval = PipelineEvaluator(
    labeler=NextColorLabeler(),
    splitter=TrainTestSplitter(),
    pipeline=pipe,
    feature_reductor=UnivariateFeatureSelector(number=0.5),
    model=SVC(),
    evaluator=evaluator,
)
# 3. step
result = pipe_eval.run(data=data)

09:41:28.610 Labeling data
09:41:28.612 Splitting data
09:41:28.614 Fitting pipeline
09:41:31.658 Applying pipeline transformations
09:41:32.814 	Original shape:		(7500, 102); 
		shape after removing NaNs: (7425, 100).
09:41:32.820 	Original shape:		(2500, 102); 
		shape after removing NaNs: (2425, 100).
09:41:32.821 Applying feature reduction
09:41:32.838 Fitting model
09:41:38.628 Predicting
09:41:40.170 Evaluating predictions
accuracy_score:
	0.48206185567010307
roc_auc_score:
	0.5
precision_score:
	0.0
recall_score:
	0.0
