## Libraries

In [22]:
import matplotlib
matplotlib.use('TkAgg')
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import math
from new_datasets_py import create_subsets

## Data processing

In [23]:
# Load the data
data = pd.read_csv('crypto-markets.csv')
btc_data = data[data['symbol'] == 'BTC'].copy()
doge_data = data[data['symbol'] == 'DOGE'].copy()
ltc_data = data[data['symbol'] == 'LTC'].copy()
btc_data.loc[:, 'date'] = pd.to_datetime(btc_data['date'])
doge_data.loc[:, 'date'] = pd.to_datetime(doge_data['date'])
ltc_data.loc[:, 'date'] = pd.to_datetime(ltc_data['date'])
btc_data.set_index('date', inplace=True)
doge_data.set_index('date', inplace=True)
ltc_data.set_index('date', inplace=True)
#missing_values = btc_data.isnull().sum() + doge_data.isnull().sum() + ltc_data.isnull().sum()
#print("Missing values:\n", missing_values)
cr_subsets = create_subsets()

  return Index(sequences[0], name=names)


## Creating subsets

In [24]:
btc_subsets = cr_subsets.create_random_subsets(btc_data)
doge_subsets = cr_subsets.create_random_subsets(doge_data)
ltc_subsets = cr_subsets.create_random_subsets(ltc_data)

In [25]:
btc_table = cr_subsets.create_dataset_table(btc_subsets)
doge_table = cr_subsets.create_dataset_table(doge_subsets)
ltc_table = cr_subsets.create_dataset_table(ltc_subsets)

In [26]:
btc_table_with_label = cr_subsets.add_label_column(btc_table)
ltc_table_with_label = cr_subsets.add_label_column(ltc_table)
doge_table_with_label = cr_subsets.add_label_column(doge_table)

print(doge_table_with_label)

    dataset_id                                              dates  \
0            1  [2018-03-18, 2018-03-19, 2018-03-20, 2018-03-2...   
1            2  [2017-01-25, 2017-01-26, 2017-01-27, 2017-01-2...   
2            3  [2018-03-04, 2018-03-05, 2018-03-06, 2018-03-0...   
3            4  [2015-06-08, 2015-06-09, 2015-06-10, 2015-06-1...   
4            5  [2017-02-21, 2017-02-22, 2017-02-23, 2017-02-2...   
5            6  [2017-09-13, 2017-09-14, 2017-09-15, 2017-09-1...   
6            7  [2018-02-26, 2018-02-27, 2018-02-28, 2018-03-0...   
7            8  [2013-12-17, 2013-12-18, 2013-12-19, 2013-12-2...   
8            9  [2017-06-17, 2017-06-18, 2017-06-19, 2017-06-2...   
9           10  [2017-07-16, 2017-07-17, 2017-07-18, 2017-07-1...   
10          11  [2014-03-21, 2014-03-22, 2014-03-23, 2014-03-2...   
11          12  [2013-12-26, 2013-12-27, 2013-12-28, 2013-12-2...   
12          13  [2017-03-16, 2017-03-17, 2017-03-18, 2017-03-1...   
13          14  [2018-03-18, 2018-

## Arima init

In [124]:
from statsmodels.tsa.arima.model import ARIMA

rise_result = []
combined_table = pd.concat([doge_table_with_label, ltc_table_with_label, btc_table_with_label])

TP, TN, FP, FN = 0, 0, 0, 0
# 4,3,5
# 3,3,5
# 2,3,5
for index, row in combined_table.iterrows():
    training_data = row['close_values'][:7]
    model = ARIMA(training_data, order=(2,3,5))
    model_fit = model.fit()

    forecast_test = model_fit.forecast(3)

    predicted_rise = 1 if forecast_test[2] > row['close_values'][9] else 0
    actual_rise = row['rise']

    if predicted_rise == 1 and actual_rise == 1:
        TP += 1
    elif predicted_rise == 0 and actual_rise == 0:
        TN += 1
    elif predicted_rise == 1 and actual_rise == 0:
        FP += 1
    elif predicted_rise == 0 and actual_rise == 1:
        FN += 1

print("True Positives (TP):", TP)
print("True Negatives (TN):", TN)
print("False Positives (FP):", FP)
print("False Negatives (FN):", FN)



True Positives (TP): 23
True Negatives (TN): 23
False Positives (FP): 44
False Negatives (FN): 30




### Summary

In [125]:
sensitivity = TP / (TP + FN)
specificity = TN / (FP + TN)
precision = TP / (TP + FP)
accuracy = (TP + TN) / (TP + TN + FP + FN)

print("Sensitivity:", sensitivity)
print("Specificity:", specificity)
print("Precision", precision)
print("Accuracy", accuracy)

Sensitivity: 0.4339622641509434
Specificity: 0.34328358208955223
Precision 0.34328358208955223
Accuracy 0.38333333333333336
