In [4]:
# ================================
# Crypto Price Direction Prediction
# Logistic Regression
# ================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# -------------------------------
# 1. Load Dataset (LOCAL CSV)
# -------------------------------
data = pd.read_csv("https:\Users\gaura\Downloads\crypto_ohlc.csv")

# Check column names
print(data.columns)

# Rename columns if needed (adjust if names differ)
data.rename(columns={
    'date': 'Date',
    'open': 'Open',
    'high': 'High',
    'low': 'Low',
    'close': 'Close',
    'volume': 'Volume'
}, inplace=True)

# Convert Date to datetime
data['Date'] = pd.to_datetime(data['Date'])

# Sort by date
data.sort_values('Date', inplace=True)

# Set Date as index
data.set_index('Date', inplace=True)

# -------------------------------
# 2. Feature Engineering
# -------------------------------

# Daily returns
data['Return'] = data['Close'].pct_change()

# Moving averages
data['MA_5'] = data['Close'].rolling(5).mean()
data['MA_10'] = data['Close'].rolling(10).mean()

# Volatility
data['Volatility'] = data['Return'].rolling(5).std()

# Volume change
data['Volume_Change'] = data['Volume'].pct_change()

# Target variable (Next day direction)
data['Direction'] = np.where(data['Return'].shift(-1) > 0, 1, 0)

# Drop NaN values
data.dropna(inplace=True)

# -------------------------------
# 3. Features & Target
# -------------------------------
features = ['Return', 'MA_5', 'MA_10', 'Volatility', 'Volume_Change']
X = data[features]
y = data['Direction']

# -------------------------------
# 4. Train-Test Split (TIME-BASED)
# -------------------------------
split = int(len(data) * 0.8)

X_train, X_test = X.iloc[:split], X.iloc[split:]
y_train, y_test = y.iloc[:split], y.iloc[split:]

# -------------------------------
# 5. Train Model
# -------------------------------
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# -------------------------------
# 6. Predictions
# -------------------------------
y_pred = model.predict(X_test)

# -------------------------------
# 7. Evaluation
# -------------------------------
print("Accuracy:", round(accuracy_score(y_test, y_pred) * 100, 2), "%\n")
print("Classification Report:\n", classification_report(y_test, y_pred))

# -------------------------------
# 8. Confusion Matrix
# -------------------------------
cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(6,4))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
            xticklabels=["Down", "Up"],
            yticklabels=["Down", "Up"])
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix - Crypto Direction Prediction")
plt.show()


SyntaxError: (unicode error) 'unicodeescape' codec can't decode bytes in position 6-7: truncated \UXXXXXXXX escape (2858958097.py, line 17)