In [47]:
!pip install pandas_ta



In [48]:
!pip install numpy==1.23.5



In [49]:
!pip install pandas



In [50]:
import pandas as pd
import pandas_ta as ta
import csv
import numpy as np

In [51]:
# Load the CSV file
btc_data = pd.read_csv('Bitcoin Historical Data.csv')
btc_data.tail()

Unnamed: 0,Date,Price,Open,High,Low,Vol. ('000),Change %
4107,1/5/2014,1014.7,924.7,1029.9,911.4,21.37,9.74%
4108,1/4/2014,924.7,884.3,932.2,848.3,14.24,4.57%
4109,1/3/2014,884.3,856.9,888.2,839.4,9.71,3.19%
4110,1/2/2014,856.9,815.9,886.2,810.5,12.81,5.02%
4111,1/1/2014,815.9,805.9,829.9,771.0,10.76,1.24%


In [52]:
# Convert the dictionary to a DataFrame
btc_data = pd.DataFrame(btc_data)

# 1. Parse the "Date" column into datetime format
btc_data["Date"] = pd.to_datetime(btc_data["Date"], format="%m/%d/%Y")

# 2. Sort the data by date in ascending order
btc_data.sort_values(by="Date", ascending=True, inplace=True)

In [53]:
# Calculate RSI, EMA, SMA, and MACD
btc_data["RSI"] = ta.rsi(btc_data["Price"], length=14)  # Relative Strength Index
btc_data["EMA"] = ta.ema(btc_data["Price"], length=14)  # Exponential Moving Average
btc_data["SMA"] = ta.sma(btc_data["Price"], length=14)  # Simple Moving Average

# Calculate MACD
macd = ta.macd(btc_data["Price"], fast=12, slow=26, signal=9)
btc_data["MACD"] = macd["MACD_12_26_9"]
btc_data["MACD_Signal"] = macd["MACDs_12_26_9"]
btc_data["MACD_Hist"] = macd["MACDh_12_26_9"]

# Display the first few rows of the data with calculated indicators
btc_data.tail()

TypeError: unsupported operand type(s) for -: 'str' and 'str'

In [None]:
# Calculate Bollinger Bands
bollinger_bands = ta.bbands(btc_data["Price"], length=20, std=2)
btc_data["BB_Upper"] = bollinger_bands["BBU_20_2.0"]
btc_data["BB_Middle"] = bollinger_bands["BBM_20_2.0"]
btc_data["BB_Lower"] = bollinger_bands["BBL_20_2.0"]

# Percentage difference between Bollinger Bands
btc_data["BB_Percentage"] = (
    (btc_data["BB_Upper"] - btc_data["BB_Lower"]) / btc_data["BB_Middle"])

In [None]:
# Function to clean and convert volume data
def clean_volume(volume):
    if isinstance(volume, str):  # Check if the value is a string
        volume = volume.replace(',', '')  # Remove commas
        if 'B' in volume:  # If the value contains 'B' (billions)
            return float(volume.replace('B', '')) * 1_000_000_000
        elif 'M' in volume:  # If the value contains 'M' (millions)
            return float(volume.replace('M', '')) * 1_000_000
        elif 'K' in volume:  # If the value contains 'K' (thousands)
            return float(volume.replace('K', '')) * 1_000
        else:  # If no suffix is present, convert to float directly
            return float(volume)
    return np.nan  # Handle unexpected cases

# Apply the cleaning function to the volume column
btc_data["Volume"] = btc_data["Vol. ('000)"].apply(clean_volume)

In [None]:
# Add On-Balance Volume (OBV)
btc_data["OBV"] = ta.obv(btc_data["Price"], btc_data["Volume"])

In [None]:
# Day of the week (0=Monday, 6=Sunday)
btc_data["Day_of_Week"] = btc_data["Date"].dt.dayofweek

# Week of the year
btc_data["Week_of_Year"] = btc_data["Date"].dt.isocalendar().week

# Month of the year
btc_data["Month"] = btc_data["Date"].dt.month

# Quarter of the year
btc_data["Quarter"] = btc_data["Date"].dt.quarter

# Year
btc_data["Year"] = btc_data["Date"].dt.year

In [None]:
# Support and Resistance Levels
btc_data["Support"] = btc_data["Low"].rolling(window=20).min()  # Lowest low in the past 20 days
btc_data["Resistance"] = btc_data["High"].rolling(window=20).max()  # Highest high in the past 20 days

In [None]:
# Calculate the percentage price change over the next 7 days (Target Variable for Regression)
btc_data["Pct_Change"] = ((btc_data["Price"].shift(-7) - btc_data["Price"]) / btc_data["Price"]) * 100

In [None]:
# Drop the original "Vol. ('000)" column
btc_data.drop(columns=["Vol. ('000)"], inplace=True)

# Drop rows with NaN values (resulting from rolling calculations)
btc_data.dropna(inplace=True)

# Reset index
btc_data.reset_index(drop=True, inplace=True)

In [None]:
# Drop rows with NaN values in the target column
btc_data = btc_data.dropna(subset=["Pct_Change"]).reset_index(drop=True)

In [None]:
btc_data.info()

In [None]:
btc_data.head()

---

In [None]:
import numpy as np
import pandas as pd

def detect_single_cup_and_handle(data, price_col="Price", window=30, handle_ratio=0.5, min_depth=0.02):
    """
    Detects distinct Cup and Handle patterns, marking only one occurrence of each pattern.

    Parameters:
    - data (pd.DataFrame): DataFrame containing price data.
    - price_col (str): Column name for price data.
    - window (int): Lookback window for detecting the pattern.
    - handle_ratio (float): Maximum size of the handle as a fraction of the cup's depth.
    - min_depth (float): Minimum depth of the cup as a fraction of the maximum price.

    Returns:
    - pd.Series: Boolean Series indicating rows where the pattern is detected.
    """
    cup_handle = [0] * len(data)  # Initialize with 0 (not detected)
    skip_until = 0  # To skip overlapping patterns

    for i in range(window, len(data) - window):
        if i < skip_until:  # Skip indices within the buffer zone
            continue

        # Define the potential pattern window
        window_data = data.iloc[i - window : i + window]
        prices = window_data[price_col].values

        # Find the local minimum (cup bottom)
        cup_bottom_idx = np.argmin(prices)
        cup_bottom = prices[cup_bottom_idx]

        # Split the prices into left and right sides of the cup
        left_prices = prices[:cup_bottom_idx]
        right_prices = prices[cup_bottom_idx + 1:]

        # Ensure we have enough data on both sides
        if len(left_prices) == 0 or len(right_prices) == 0:
            continue

        # Validate the cup's depth
        left_peak = max(left_prices)
        right_peak = max(right_prices)
        cup_depth = min(left_peak, right_peak) - cup_bottom
        if cup_depth < min_depth * max(prices):  # Minimum depth as a fraction of max price
            continue

        # Validate symmetry: left and right peaks should be similar
        if abs(left_peak - right_peak) > 0.02 * max(prices):  # 2% tolerance for symmetry
            continue

        # Handle validation: Should be smaller than the depth of the cup and near the right peak
        handle_start = cup_bottom_idx + int(len(right_prices) * handle_ratio)
        handle_prices = prices[handle_start:]
        if len(handle_prices) == 0 or max(handle_prices) > right_peak:
            continue

        # If all conditions are met, mark this index as a Cup and Handle
        cup_handle[i] = 1
        skip_until = i + window  # Skip overlapping patterns within this window

    return pd.Series(cup_handle, index=data.index)

In [None]:
def detect_ascending_triangle(data, price_col="Price", window=30):
    """
    Detects potential Ascending Triangle patterns in price data.

    Parameters:
    - data (pd.DataFrame): DataFrame containing price data.
    - price_col (str): Column name for price data.
    - window (int): Lookback window for detecting the pattern.

    Returns:
    - pd.Series: Boolean Series indicating rows where the pattern is detected.
    """
    ascending_triangle = [0] * len(data)  # Initialize with 0 (not detected)

    for i in range(window, len(data) - window):
        # Define the potential pattern window
        window_data = data.iloc[i - window : i + window]
        prices = window_data[price_col].values

        # Identify the resistance level (maximum price in the window)
        resistance = max(prices)

        # Identify the support level (minimum price increases over time)
        support = np.polyfit(range(len(prices)), prices, 1)  # Fit a linear trendline
        slope = support[0]

        # Validate the pattern
        if slope > 0 and max(prices[-5:]) > resistance:  # Breakout above resistance
            ascending_triangle[i] = 1

    return pd.Series(ascending_triangle, index=data.index)

In [None]:
def detect_inverse_head_and_shoulders(data, price_col="Price", window=30):
    """
    Detects potential Inverse Head and Shoulders patterns in price data.

    Parameters:
    - data (pd.DataFrame): DataFrame containing price data.
    - price_col (str): Column name for price data.
    - window (int): Lookback window for detecting the pattern.

    Returns:
    - pd.Series: Boolean Series indicating rows where the pattern is detected.
    """
    inverse_hs = [0] * len(data)  # Initialize with 0 (not detected)

    for i in range(window, len(data) - window):
        # Define the potential pattern window
        window_data = data.iloc[i - window : i + window]
        prices = window_data[price_col].values

        # Ensure we have enough data to form the pattern
        if len(prices) < 7:
            continue

        # Identify the three lows (shoulders and head)
        left_shoulder = min(prices[: len(prices) // 3])
        head = min(prices[len(prices) // 3 : 2 * len(prices) // 3])
        right_shoulder = min(prices[2 * len(prices) // 3 :])

        # Validate the pattern: Shoulders should be higher than the head
        if left_shoulder > head and right_shoulder > head and abs(left_shoulder - right_shoulder) <= 0.05 * head:
            # Validate neckline breakout
            neckline = max(prices[: len(prices) // 3] + prices[2 * len(prices) // 3 :])
            if prices[-1] > neckline:  # Breakout above neckline
                inverse_hs[i] = 1

    return pd.Series(inverse_hs, index=data.index)

In [None]:
btc_data['Cup_And_Handle'] = detect_single_cup_and_handle(btc_data, price_col='Price', window=30)
btc_data['Ascending_Triangle'] = detect_ascending_triangle(btc_data, price_col='Price', window=30)
btc_data['Inverse_Head_And_Shoulders'] = detect_inverse_head_and_shoulders(btc_data, price_col='Price', window=30)

# Combine all patterns into a single column for classification
btc_data['Pattern'] = 0  # Default: No pattern
btc_data.loc[btc_data['Cup_And_Handle'] == 1, 'Pattern'] = 1
btc_data.loc[btc_data['Ascending_Triangle'] == 1, 'Pattern'] = 2
btc_data.loc[btc_data['Inverse_Head_And_Shoulders'] == 1, 'Pattern'] = 3

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, roc_auc_score,
    mean_squared_error, r2_score, classification_report
)
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [None]:
# Features: Include technical indicators
features = ['RSI', 'EMA', 'SMA', 'MACD', 'MACD_Signal', 'Volume', 'BB_Percentage']

# Targets
classification_target = 'Pattern'
regression_target = 'Pct_Change'

# Split data into training and testing sets
X = btc_data[features]
y_classification = btc_data[classification_target]
y_regression = btc_data[regression_target]

X_train, X_test, y_train_class, y_test_class = train_test_split(X, y_classification, test_size=0.2, random_state=42)
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X, y_regression, test_size=0.2, random_state=42)

# Scale the features for models that require scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
X_train_reg_scaled = scaler.fit_transform(X_train_reg)
X_test_reg_scaled = scaler.transform(X_test_reg)

In [None]:
from sklearn.svm import SVC

# Initialize and train SVM
svm_model = SVC(kernel='rbf', probability=True, random_state=42)
svm_model.fit(X_train_scaled, y_train_class)

# Predictions
y_pred_svm = svm_model.predict(X_test_scaled)
y_pred_svm_proba = svm_model.predict_proba(X_test_scaled)

In [None]:
from sklearn.tree import DecisionTreeClassifier

# Initialize and train Decision Tree
tree_model = DecisionTreeClassifier(random_state=42)
tree_model.fit(X_train, y_train_class)

# Predictions
y_pred_tree = tree_model.predict(X_test)

In [None]:
from xgboost import XGBClassifier

# Initialize and train XGBoost
xgb_model = XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='mlogloss')
xgb_model.fit(X_train, y_train_class)

# Predictions
y_pred_xgb = xgb_model.predict(X_test)
y_pred_xgb_proba = xgb_model.predict_proba(X_test)

In [None]:
'''import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Define and train ANN
ann_model = Sequential([
    Dense(32, activation='relu', input_dim=len(features)),
    Dropout(0.2),
    Dense(16, activation='relu'),
    Dense(len(y_train_class.unique()), activation='softmax')  # Output layer for multi-class classification
])
ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
ann_model.fit(X_train_scaled, y_train_class, epochs=50, batch_size=32, verbose=1)

# Predictions
y_pred_ann_proba = ann_model.predict(X_test_scaled)
y_pred_ann = np.argmax(y_pred_ann_proba, axis=1)'''

In [None]:
from sklearn.linear_model import LogisticRegression

# Initialize and train Logistic Regression
logistic_model = LogisticRegression(multi_class='multinomial', solver='lbfgs', random_state=42)
logistic_model.fit(X_train_scaled, y_train_class)

# Predictions
y_pred_logistic = logistic_model.predict(X_test_scaled)
y_pred_logistic_proba = logistic_model.predict_proba(X_test_scaled)

In [None]:
from sklearn.tree import DecisionTreeRegressor

tree_regressor = DecisionTreeRegressor(random_state=42)
tree_regressor.fit(X_train_reg, y_train_reg)
y_pred_tree_reg = tree_regressor.predict(X_test_reg)

In [None]:
from xgboost import XGBRegressor

xgb_regressor = XGBRegressor(random_state=42)
xgb_regressor.fit(X_train_reg, y_train_reg)
y_pred_xgb_reg = xgb_regressor.predict(X_test_reg)

In [None]:
'''ann_regressor = Sequential([
    Dense(64, activation='relu', input_dim=len(features)),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dense(1)  # Single output for regression
])
ann_regressor.compile(optimizer='adam', loss='mse', metrics=['mae'])
ann_regressor.fit(X_train_reg_scaled, y_train_reg, epochs=50, batch_size=32, verbose=1)

# Predictions
y_pred_ann_reg = ann_regressor.predict(X_test_reg_scaled).flatten()'''

In [None]:
from sklearn.metrics import roc_auc_score

def evaluate_classification(y_true, y_pred, y_proba=None):
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Precision:", precision_score(y_true, y_pred, average='weighted'))
    print("Recall:", recall_score(y_true, y_pred, average='weighted'))
    print("F1-Score:", f1_score(y_true, y_pred, average='weighted'))
    if y_proba is not None:
        print("ROC-AUC:", roc_auc_score(pd.get_dummies(y_true), y_proba, multi_class='ovr'))

print("SVM Performance:")
evaluate_classification(y_test_class, y_pred_svm, y_pred_svm_proba)

print("\nDecision Tree Performance:")
evaluate_classification(y_test_class, y_pred_tree)

print("\nXGBoost Performance:")
evaluate_classification(y_test_class, y_pred_xgb, y_pred_xgb_proba)

print("\nANN Performance:")
# evaluate_classification(y_test_class, y_pred_ann, y_pred_ann_proba)

print("\nLogistic Regression Performance:")
evaluate_classification(y_test_class, y_pred_logistic, y_pred_logistic_proba)

In [None]:
def evaluate_regression(y_true, y_pred):
    print("R2 Score:", r2_score(y_true, y_pred))
    print("Mean Squared Error (MSE):", mean_squared_error(y_true, y_pred))
    print("Root Mean Squared Error (RMSE):", np.sqrt(mean_squared_error(y_true, y_pred)))

print("\nDecision Tree Regressor Performance:")
evaluate_regression(y_test_reg, y_pred_tree_reg)

print("\nXGBoost Regressor Performance:")
evaluate_regression(y_test_reg, y_pred_xgb_reg)

print("\nANN Regressor Performance:")
# evaluate_regression(y_test_reg, y_pred_ann_reg)

In [None]:
btc_data.head()

In [None]:
pattern_counts = btc_data['Pattern'].value_counts()
pattern_counts

In [None]:
# Filter rows where a pattern is detected (non-zero values in 'Pattern')
pattern_rows = btc_data[btc_data['Pattern'] != 0]

# Display the rows with patterns
print(pattern_rows[['Date', 'Pattern', 'Price']])

import matplotlib.pyplot as plt

# Plot the full price data
plt.figure(figsize=(16, 8))
plt.plot(btc_data['Date'], btc_data['Price'], label='Price', color='blue', alpha=0.5)

# Highlight the rows where patterns appear
for pattern in pattern_rows['Pattern'].unique():
    pattern_data = pattern_rows[pattern_rows['Pattern'] == pattern]
    plt.scatter(
        pattern_data['Date'],
        pattern_data['Price'],
        label=f"Pattern {pattern}",
        marker='o'
    )

# Add title, labels, and legend
plt.title('Price Data with Detected Patterns', fontsize=16)
plt.xlabel('Date', fontsize=14)
plt.ylabel('Price', fontsize=14)
plt.legend(fontsize=12)
plt.grid(True)
plt.show()

In [None]:
import matplotlib.pyplot as plt

# Filter data for 2025 Q1
btc_data_2025_q1 = btc_data[(btc_data['Year'] == 2025) & (btc_data['Quarter'] == 1)]

# Filter Pattern 1 (Cup and Handle) for 2025 Q1
cup_and_handle_2025_q1 = btc_data_2025_q1[btc_data_2025_q1['Pattern'] == 1]

# Plot the data
plt.figure(figsize=(16, 8))  # Set a larger figure size
plt.plot(btc_data_2025_q1['Date'], btc_data_2025_q1['Price'], label='Price', color='blue', alpha=0.5)

# Highlight Cup and Handle patterns
plt.scatter(
    cup_and_handle_2025_q1['Date'],
    cup_and_handle_2025_q1['Price'],
    color='orange',
    label='Cup and Handle (Pattern 1)',
    marker='o'
)

# Add title, labels, and legend
plt.title('Price Data with Cup and Handle Pattern (Pattern 1) - 2025 Q1', fontsize=16)
plt.xlabel('Date', fontsize=14)
plt.ylabel('Price', fontsize=14)
plt.legend(fontsize=12)
plt.grid(True)
plt.show()

In [None]:
import matplotlib.pyplot as plt

# Filter data for January 2025
btc_data_jan_2025 = btc_data[(btc_data['Year'] == 2025) & (btc_data['Month'] == 1)]

# Filter Pattern 1 (Cup and Handle) for January 2025
cup_and_handle_jan_2025 = btc_data_jan_2025[btc_data_jan_2025['Pattern'] == 1]

# Plot the data
plt.figure(figsize=(16, 8))  # Set a larger figure size
plt.plot(btc_data_jan_2025['Date'], btc_data_jan_2025['Price'], label='Price', color='blue', alpha=0.5)

# Highlight Cup and Handle patterns
plt.scatter(
    cup_and_handle_jan_2025['Date'],
    cup_and_handle_jan_2025['Price'],
    color='orange',
    label='Cup and Handle (Pattern 1)',
    marker='o'
)

# Add title, labels, and legend
plt.title('Price Data with Cup and Handle Pattern (Pattern 1) - January 2025', fontsize=16)
plt.xlabel('Date', fontsize=14)
plt.ylabel('Price', fontsize=14)
plt.legend(fontsize=12)
plt.grid(True)
plt.show()

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

def plot_monthly_prices(data, date_col="Date", price_col="Price"):
    """
    Plots monthly Bitcoin price graphs.

    Parameters:
    - data (pd.DataFrame): DataFrame containing Bitcoin price data.
    - date_col (str): Column name for the date.
    - price_col (str): Column name for the price.
    """
    # Ensure the date column is in datetime format
    data[date_col] = pd.to_datetime(data[date_col])

    # Add 'Year' and 'Month' columns for grouping
    data['Year'] = data[date_col].dt.year
    data['Month'] = data[date_col].dt.month

    # Group by year and month
    grouped = data.groupby(['Year', 'Month'])

    # Plot for each month
    for (year, month), group in grouped:
        plt.figure(figsize=(12, 6))
        plt.plot(group[date_col], group[price_col], label=f"{year}-{month:02d}", color='blue', alpha=0.7)
        plt.title(f"Bitcoin Price for {year}-{month:02d}", fontsize=16)
        plt.xlabel("Date", fontsize=14)
        plt.ylabel("Price", fontsize=14)
        plt.grid(True)
        plt.legend(fontsize=12)
        plt.show()

# Example usage
# Replace btc_data with your DataFrame containing Bitcoin data
plot_monthly_prices(btc_data, date_col="Date", price_col="Price")

In [None]:
btc_data.info()

In [None]:
btc_data.head()