In [4]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [5]:
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA

# Load cryptocurrency data from CSV file
crypto_data = pd.read_csv('/content/drive/MyDrive/time_series/bitstampUSD_1-min_data_2012-01-01_to_2017-10-20.csv')

crypto_data =crypto_data[:15000]
crypto_data

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
0,1325317920,4.39,4.39,4.39,4.39,0.455581,2.000000,4.39
1,1325317980,4.39,4.39,4.39,4.39,0.455581,2.000000,4.39
2,1325318040,4.39,4.39,4.39,4.39,0.455581,2.000000,4.39
3,1325318100,4.39,4.39,4.39,4.39,0.455581,2.000000,4.39
4,1325318160,4.39,4.39,4.39,4.39,0.455581,2.000000,4.39
...,...,...,...,...,...,...,...,...
14995,1326217620,6.55,6.55,6.55,6.55,1.531394,10.030628,6.55
14996,1326217680,6.55,6.55,6.55,6.55,1.531394,10.030628,6.55
14997,1326217740,6.55,6.55,6.55,6.55,1.531394,10.030628,6.55
14998,1326217800,6.55,6.55,6.55,6.55,1.531394,10.030628,6.55


In [6]:
import datetime
crypto_data["Timestamp"] = pd.to_datetime(crypto_data["Timestamp"], unit='s')
crypto_data = crypto_data

In [7]:
crypto_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15000 entries, 0 to 14999
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Timestamp          15000 non-null  datetime64[ns]
 1   Open               15000 non-null  float64       
 2   High               15000 non-null  float64       
 3   Low                15000 non-null  float64       
 4   Close              15000 non-null  float64       
 5   Volume_(BTC)       15000 non-null  float64       
 6   Volume_(Currency)  15000 non-null  float64       
 7   Weighted_Price     15000 non-null  float64       
dtypes: datetime64[ns](1), float64(7)
memory usage: 937.6 KB


In [8]:
crypto_data.isnull().sum()

Timestamp            0
Open                 0
High                 0
Low                  0
Close                0
Volume_(BTC)         0
Volume_(Currency)    0
Weighted_Price       0
dtype: int64

In [9]:
# Split the data into training and test sets
train_data = crypto_data.iloc[:10500]
test_data = crypto_data.iloc[10500:]
train_data.shape, test_data.shape

((10500, 8), (4500, 8))

In [19]:

# Fit an ARIMA model to the training data
arima_model = ARIMA(train_data['Weighted_Price'], order=(2,1,2))
arima_fit = arima_model.fit()

In [20]:



# Make predictions on the test data using the fitted model
arima_preds = arima_fit.predict(start=test_data.index[0], end=test_data.index[-1])

# Evaluate the model's performance using mean squared error (MSE)
mse = ((arima_preds - test_data['Weighted_Price'])**2).mean()
print('ARIMA MSE:', mse)

ARIMA MSE: 0.49728573324827036


# SARIMA

In [21]:
from statsmodels.tsa.statespace.sarimax import SARIMAX


# Fit a SARIMA model to the training data
sarima_model = SARIMAX(train_data['Weighted_Price'], order=(2,1,2), seasonal_order=(1,1,1,12))
sarima_fit = sarima_model.fit()

# Make predictions on the test data using the fitted model
sarima_preds = sarima_fit.predict(start=test_data.index[0], end=test_data.index[-1])

# Evaluate the model's performance using mean squared error (MSE)
mse = ((sarima_preds - test_data['Weighted_Price'])**2).mean()
print('SARIMA MSE:', mse)




SARIMA MSE: 0.7598333717908106


# ExponentialSmoothing

In [22]:
from statsmodels.tsa.holtwinters import ExponentialSmoothing

# Fit an exponential smoothing model to the training data
es_model = ExponentialSmoothing(train_data['Weighted_Price'], seasonal_periods=12, trend='add', seasonal='add')
es_fit = es_model.fit()

# Make predictions on the test data using the fitted model
es_preds = es_fit.predict(start=test_data.index[0], end=test_data.index[-1])

# Evaluate the model's performance using mean squared error (MSE)
mse = ((es_preds - test_data['Weighted_Price'])**2).mean()
print('Exponential Smoothing MSE:', mse)




Exponential Smoothing MSE: 0.2810339317028149


# LSTM

In [10]:
from keras.models import Sequential
from keras.layers import LSTM, Dense
import tensorflow as tf
import numpy as np

# Preprocess the data for use in an LSTM model
def preprocess_lstm_data(data, look_back=1):
    X, y = [], []
    for i in range(len(data)-look_back-1):
        X.append(data[i:(i+look_back), 0])
        y.append(data[(i+look_back), 0])
    return np.array(X), np.array(y)

look_back = 10
train_X, train_y = preprocess_lstm_data(train_data[['Weighted_Price']].values, look_back)
test_X, test_y = preprocess_lstm_data(test_data[['Weighted_Price']].values, look_back)

# Define and train an LSTM model
model = Sequential()
model.add(LSTM(4, input_shape=(look_back, 1)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(train_X, train_y, epochs=10, batch_size=1, verbose=2)

# Make predictions on the test data using the trained model
lstm_preds = model.predict(test_X)

# Evaluate the model's performance using mean squared error (MSE)
mse = ((lstm_preds - test_y)**2).mean()

Epoch 1/10
10489/10489 - 39s - loss: 0.4767 - 39s/epoch - 4ms/step
Epoch 2/10
10489/10489 - 33s - loss: 6.8174e-04 - 33s/epoch - 3ms/step
Epoch 3/10
10489/10489 - 32s - loss: 4.7574e-04 - 32s/epoch - 3ms/step
Epoch 4/10
10489/10489 - 31s - loss: 4.4321e-04 - 31s/epoch - 3ms/step
Epoch 5/10
10489/10489 - 32s - loss: 3.4788e-04 - 32s/epoch - 3ms/step
Epoch 6/10
10489/10489 - 32s - loss: 2.6562e-04 - 32s/epoch - 3ms/step
Epoch 7/10
10489/10489 - 31s - loss: 2.4990e-04 - 31s/epoch - 3ms/step
Epoch 8/10
10489/10489 - 33s - loss: 2.3718e-04 - 33s/epoch - 3ms/step
Epoch 9/10
10489/10489 - 31s - loss: 2.2239e-04 - 31s/epoch - 3ms/step
Epoch 10/10
10489/10489 - 31s - loss: 2.2753e-04 - 31s/epoch - 3ms/step


In [None]:
from pandas.tseries.offsets import DateOffset
# Train individual models on the training data and make predictions on the test data
es_model = ExponentialSmoothing(train_data['Weighted_Price'], seasonal_periods=12, trend='add', seasonal='add')
es_fit = es_model.fit()
es_preds = es_fit.predict(start=test_data.index[0], end=test_data.index[-1])

sarima_model = SARIMAX(train_data['Weighted_Price'], order=(2,1,2), seasonal_order=(1,1,1,12))
sarima_fit = sarima_model.fit()
sarima_start = test_data.index[0]


In [None]:
from sklearn.linear_model import LinearRegression

look_back = 10
train_X, train_y = preprocess_lstm_data(train_data[['Weighted_Price']].values, look_back)
test_X, test_y = preprocess_lstm_data(test_data[['Weighted_Price']].values, look_back)

lstm_model = Sequential()
lstm_model.add(LSTM(4, input_shape=(look_back, 1)))
lstm_model.add(Dense(1))
lstm_model.compile(loss='mean_squared_error', optimizer='adam')
lstm_model.fit(train_X, train_y, epochs=10, batch_size=1, verbose=2)
lstm_preds = lstm_model.predict(test_X)


In [None]:
# Combine the predictions of the individual models into a single dataframe
preds_df = pd.DataFrame({'es_preds': es_preds, 'sarima_preds': sarima_preds}, index=test_data.index)

# Use the individual model predictions as features and fit a final model
final_model = LinearRegression()
final_model.fit(preds_df.loc[test_data.index], test_data['Weighted_Price'])


In [None]:
# Make predictions on future data using the ensemble model
future_data = crypto_data[2000:3000] # hypothetical future data
future_es_preds = es_fit.forecast(steps=len(future_data))
future_sarima_preds = sarima_fit.forecast(start=future_data.index[0], end=future_data.index[-1])
future_lstm_preds = lstm_model.predict(preprocess_lstm_data(future_data[['price']].values, look_back)[0])
future_preds_df = pd.DataFrame({'es_preds': future_es_preds, 'sarima_preds': future_sarima_preds, 'lstm_preds': future_lstm_preds.flatten()}, index=future_data.index)
ensemble_preds = final_model.predict(future_preds_df)