In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import boto3
import joblib
import os

In [2]:
# Load dataset from CSV
csv_file = "bitcoin_2017_to_2023.csv"  # Replace with actual file path
df = pd.read_csv(csv_file)

# Display the first few rows
df.head()

Unnamed: 0,timestamp,open,high,low,close,volume,quote_asset_volume,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume
0,2023-08-01 13:19:00,28902.48,28902.49,28902.48,28902.49,4.68658,135453.8,258,0.89391,25836.224836
1,2023-08-01 13:18:00,28902.48,28902.49,28902.48,28902.49,4.77589,138035.1,317,2.24546,64899.385195
2,2023-08-01 13:17:00,28908.52,28908.53,28902.48,28902.49,11.52263,333053.2,451,2.70873,78290.170121
3,2023-08-01 13:16:00,28907.41,28912.74,28907.41,28908.53,15.8961,459555.6,483,10.22981,295738.166916
4,2023-08-01 13:15:00,28896.0,28907.42,28893.03,28907.41,37.74657,1090761.0,686,16.50452,476955.246611


### Preprocess the Dataset

In [3]:
# Drop timestamp column (not needed for training)
df.drop(columns=["timestamp"], inplace=True)

In [4]:
# Define features and target variable (predicting 'close' price)
X = df.drop(columns=["close"])  # Features
y = df["close"]  # Target variable

In [5]:
# Split into train and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Scale the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

### Train a Machine Learning Model

In [7]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

In [None]:
# Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [None]:
# Evaluate model
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Absolute Error: {mae}")

In [None]:
### Save and Package the Model

In [None]:
# Save model and scaler
os.makedirs("model", exist_ok=True)
joblib.dump(model, "model/model.pkl")
joblib.dump(scaler, "model/scaler.pkl")

### New Model

In [1]:
import pandas as pd
import datetime
from datetime import date, timedelta

In [15]:
data = pd.read_csv('/home/jupyteruser/work/data/btc_15m_data_2018_to_2025.csv')

In [3]:
data.head(5)

Unnamed: 0,Open time,Open,High,Low,Close,Volume,Close time,Quote asset volume,Number of trades,Taker buy base asset volume,Taker buy quote asset volume,Ignore
0,2018-01-01 00:00:00,13715.65,13715.65,13400.01,13556.15,123.616013,2018-01-01 00:14:59.999,1675545.0,1572,63.227133,857610.8,0
1,2018-01-01 00:15:00,13533.75,13550.87,13402.0,13521.12,98.13643,2018-01-01 00:29:59.999,1321757.0,1461,47.686389,642281.2,0
2,2018-01-01 00:30:00,13500.0,13545.37,13450.0,13470.41,79.904037,2018-01-01 00:44:59.999,1078825.0,1000,43.710406,590034.7,0
3,2018-01-01 00:45:00,13494.65,13690.87,13450.0,13529.01,141.699719,2018-01-01 00:59:59.999,1917783.0,1195,73.897993,1000614.0,0
4,2018-01-01 01:00:00,13528.99,13571.74,13402.28,13445.63,72.537533,2018-01-01 01:14:59.999,977819.8,898,34.257652,461836.9,0


In [16]:
data = data[["Open time", "Open", "High", "Low", "Close", "Volume",]]

In [17]:
data = data[["Open time", "Close"]]

In [18]:
data.head(10)

Unnamed: 0,Open time,Close
0,2018-01-01 00:00:00,13556.15
1,2018-01-01 00:15:00,13521.12
2,2018-01-01 00:30:00,13470.41
3,2018-01-01 00:45:00,13529.01
4,2018-01-01 01:00:00,13445.63
5,2018-01-01 01:15:00,13560.0
6,2018-01-01 01:30:00,13440.01
7,2018-01-01 01:45:00,13203.06
8,2018-01-01 02:00:00,13387.0
9,2018-01-01 02:15:00,13225.0


In [None]:
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
plt.figure(figsize=(15, 10))
plt.plot(data["Open time"], data["Close"])

[<matplotlib.lines.Line2D at 0x7f6fcd5cc940>]

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose
result = seasonal_decompose(data["Close"],model='multiplicative', freq = 30)
fig = plt.figure()  
fig = result.plot()  
fig.set_size_inches(15, 10)

In [None]:
pd.plotting.autocorrelation_plot(data["Close"])

In [None]:
from statsmodels.graphics.tsaplots import plot_pacf
plot_pacf(data["Close"], lags = 100)

In [None]:
# for stationary data
p, d, q = 5, 1, 2
from statsmodels.tsa.arima_model import ARIMA
model = ARIMA(data["Close"], order=(p,d,q))  
fitted = model.fit(disp=-1)  
print(fitted.summary())

In [None]:
predictions = fitted.predict()
print(predictions)

In [None]:
# Seasonal data
import statsmodels.api as sm
import warnings
model=sm.tsa.statespace.SARIMAX(data['Close'],order=(p, d, q),seasonal_order=(p, d, q, 12))
model=model.fit()
print(model.summary())

In [None]:
predictions = model.predict(len(data), len(data)+10)
print(predictions)

In [None]:
data["Close"].plot(legend=True, label="Training Data", figsize=(15, 10))
predictions.plot(legend=True, label="Predictions")