# Bitcoin Price Prediction (Notebook)

This notebook reproduces the steps:
- Load data
- EDA
- Feature engineering
- Modeling (LogReg, SVM, XGB)
- Evaluation


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn import metrics
from sklearn.metrics import ConfusionMatrixDisplay

import warnings
warnings.filterwarnings('ignore')

%matplotlib inline


In [None]:
import os
ROOT = os.path.abspath('..')
DATA_PATH = os.path.join(ROOT, 'data', 'bitcoin.csv')
df = pd.read_csv(DATA_PATH)
df.head()

In [None]:
plt.figure(figsize=(15,5))
plt.plot(df['Close'])
plt.title('Bitcoin Close price.')
plt.ylabel('Price in dollars.')
plt.show()

features = ['Open','High','Low','Close']
for col in features:
    plt.figure(figsize=(6,3))
    sn.distplot(df[col])
    plt.title(f"Distribution - {col}")
    plt.show()

In [None]:
df['Date'] = pd.to_datetime(df['Date'])
df['year'] = df['Date'].dt.year.astype(int)
df['month'] = df['Date'].dt.month.astype(int)
df['day'] = df['Date'].dt.day.astype(int)

df['is_quarter_end'] = np.where(df['month']%3==0,1,0)
df['open-close']  = df['Open'] - df['Close']
df['low-high']  = df['Low'] - df['High']
df['target'] = np.where(df['Close'].shift(-1) > df['Close'], 1, 0)
df = df.dropna().reset_index(drop=True)
df.head()

In [None]:
features = df[['open-close','low-high','is_quarter_end']]
target = df['target']

scaler = StandardScaler()
X = scaler.fit_transform(features)
X_train, X_valid, Y_train, Y_valid = train_test_split(X, target, test_size=0.3, random_state=42, stratify=target)

models = [LogisticRegression(max_iter=1000), SVC(kernel='poly', probability=True), XGBClassifier(eval_metric='logloss', use_label_encoder=False)]
for m in models:
    m.fit(X_train, Y_train)
    print(m, ':')
    print('Training AUC:', metrics.roc_auc_score(Y_train, m.predict_proba(X_train)[:,1]))
    print('Validation AUC:', metrics.roc_auc_score(Y_valid, m.predict_proba(X_valid)[:,1]))
    print()

ConfusionMatrixDisplay.from_estimator(models[0], X_valid, Y_valid, cmap='Blues')
plt.show()