In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report


In [None]:
# View to get overview of dataset
df = pd.read_csv("bitcoin_data.csv")
df.head(10)

In [None]:
#Checking the detailed data
df.describe()

In [None]:
df.info()

In [None]:
# Check the data type of Timestamp
print(df['Timestamp'].dtype)
# If these are numbers or a strange format, look at the first 5 values
print(df['Timestamp'].head(10))


In [None]:
# Suppose Timestamp is a date string, then:
df['date'] = pd.to_datetime(df['Timestamp'])
df.set_index('date', inplace=True)
              

In [None]:
# Take only price and volume
df = df[['Close','Volume']].rename(columns={'Close':'close','Volume':'volume'})
df = df.sort_index()


In [None]:
# Price chart
plt.plot(df['close'])
plt.title("BTC Price")
plt.show()

# Volume chart
plt.plot(df['volume'])
plt.title("BTC Trading Volume")
plt.show()

# Price histogram
sns.histplot(df['close'], bins=50, kde=True)
plt.title("BTC Price Distribution")
plt.show()

In [None]:
# Features
df['ret1'] = df['close'].pct_change()
df['sma5'] = df['close'].rolling(5).mean()
df['sma10'] = df['close'].rolling(10).mean()
df['mom5'] = df['close'] - df['close'].shift(5)
df['target'] = (df['close'].shift(-1) > df['close']).astype(int)
df.dropna(inplace=True)

In [None]:
features = ['ret1','sma5','sma10','mom5']
X = df[features]
y = df['target']

# Train/test
split_idx = int(len(df)*0.8)
X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx:]
y_train, y_test = y.iloc[:split_idx], y.iloc[split_idx:]

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# Models
models = {
    "LogisticRegression": LogisticRegression(max_iter=1000),
    "RandomForest": RandomForestClassifier(n_estimators=100, random_state=42)
}

In [None]:
# Predicted results

results = {}
for name, model in models.items():
    if name=="LogisticRegression":
        model.fit(X_train_scaled, y_train)
        pred = model.predict(X_test_scaled)
    else:
        model.fit(X_train, y_train)
        pred = model.predict(X_test)
    
    acc = accuracy_score(y_test, pred)
    f1 = f1_score(y_test, pred)
    results[name] = {'accuracy': acc, 'f1_score': f1}
    
    print(name)
    print("Accuracy:", acc)
    print("F1 Score:", f1)
    print(classification_report(y_test, pred))
    
    cm = confusion_matrix(y_test, pred)
    sns.heatmap(cm, annot=True, fmt='d')
    plt.title(f"{name} Confusion Matrix")
    plt.show()

print(pd.DataFrame(results).T)