In [40]:
import requests
import pandas as pd
from datetime import datetime
import os
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import joblib

In [41]:
def fetch_crypto_data(crypto_pair, start_date):
    """
    Fetches daily historical data for a specified cryptocurrency pair from the CryptoCompare API.

    Parameters:
    - crypto_pair (str): Cryptocurrency pair in the format "BTC/USD".
    - start_date (str): Start date in "YYYY-MM-DD" format.

    Returns:
    - DataFrame: Contains Date, Open, High, Low, and Close prices.
    """
    # Convert start_date to timestamp
    start_timestamp = int(datetime.strptime(start_date, "%Y-%m-%d").timestamp())

    # Parse crypto_pair into individual symbols
    fsym, tsym = crypto_pair.split('/')

    # Define API endpoint and parameters
    url = "https://min-api.cryptocompare.com/data/v2/histoday"
    params = {
        'fsym': fsym,            # From symbol
        'tsym': tsym,            # To symbol
        'limit': 2000,           # Max days to fetch in one request
        'toTs': start_timestamp  # End timestamp (fetches data from start date onward)
    }

    # API Key (register at CryptoCompare to get one if required)
    api_key = "af387748d6eaba1f35e3d283cad9dcaf8dc7504039d637dc71fbcd9874bcd630"
    headers = {
        'authorization': f'Apikey {api_key}'
    }

    # Send request
    response = requests.get(url, headers=headers, params=params)
    data = response.json()

    # Check for errors in response
    if data['Response'] != 'Success':
        raise Exception("Error fetching data from CryptoCompare:", data['Message'])

    # Extract data
    historical_data = data['Data']['Data']
    # Convert to DataFrame
    df = pd.DataFrame(historical_data)
    df['time'] = pd.to_datetime(df['time'], unit='s')
    df.rename(columns={'time': 'Date', 'open': 'Open', 'high': 'High', 'low': 'Low', 'close': 'Close'}, inplace=True)
    df = df[['Date', 'Open', 'High', 'Low', 'Close']]

    return df

In [42]:
data = fetch_crypto_data("BTC/USD", "2024-01-01")

In [43]:
data

Unnamed: 0,Date,Open,High,Low,Close
0,2018-07-11,6306.87,6405.59,6293.68,6394.36
1,2018-07-12,6394.36,6394.93,6084.00,6253.60
2,2018-07-13,6253.66,6349.21,6131.54,6229.83
3,2018-07-14,6229.61,6332.46,6190.18,6268.75
4,2018-07-15,6268.32,6401.50,6245.75,6364.26
...,...,...,...,...,...
1996,2023-12-28,43469.34,43817.61,42283.91,42588.94
1997,2023-12-29,42588.94,43134.59,41296.86,42073.59
1998,2023-12-30,42073.59,42595.81,41529.95,42146.03
1999,2023-12-31,42146.03,42863.11,41970.37,42280.14


In [44]:
import pandas as pd
import numpy as np

def calculate_metrics(data, variable1, variable2):
    """
    Calculates historical and future metrics for a given DataFrame with cryptocurrency data.

    Parameters:
    - data (DataFrame): The historical cryptocurrency data containing 'Date', 'Open', 'High', 'Low', 'Close' columns.
    - variable1 (int): The look-back period for historical high and low metrics.
    - variable2 (int): The look-forward period for future high and low metrics.

    Returns:
    - DataFrame: The input DataFrame with added metric columns.
    """

    # Historical High and Low prices over the last `variable1` days
    data[f'High_Last_{variable1}_Days'] = data['High'].rolling(window=variable1, min_periods=1).max()
    data[f'Low_Last_{variable1}_Days'] = data['Low'].rolling(window=variable1, min_periods=1).min()

    # Days since Historical High and Low
    data[f'Days_Since_High_Last_{variable1}_Days'] = data.apply(
        lambda row: (row['Date'] - data.loc[:row.name, 'Date'][data['High'][:row.name + 1] == row[f'High_Last_{variable1}_Days']].iloc[-1]).days
        if pd.notna(row[f'High_Last_{variable1}_Days']) else np.nan, axis=1
    )
    data[f'Days_Since_Low_Last_{variable1}_Days'] = data.apply(
        lambda row: (row['Date'] - data.loc[:row.name, 'Date'][data['Low'][:row.name + 1] == row[f'Low_Last_{variable1}_Days']].iloc[-1]).days
        if pd.notna(row[f'Low_Last_{variable1}_Days']) else np.nan, axis=1
    )

    # Percentage difference from Historical High and Low
    data[f'%_Diff_From_High_Last_{variable1}_Days'] = (data['Close'] - data[f'High_Last_{variable1}_Days']) / data[f'High_Last_{variable1}_Days'] * 100
    data[f'%_Diff_From_Low_Last_{variable1}_Days'] = (data['Close'] - data[f'Low_Last_{variable1}_Days']) / data[f'Low_Last_{variable1}_Days'] * 100

    # Future High and Low prices over the next `variable2` days
    data[f'High_Next_{variable2}_Days'] = data['High'].shift(-variable2).rolling(window=variable2, min_periods=1).max()
    data[f'Low_Next_{variable2}_Days'] = data['Low'].shift(-variable2).rolling(window=variable2, min_periods=1).min()

    # Percentage difference from Future High and Low
    data[f'%_Diff_From_High_Next_{variable2}_Days'] = (data['Close'] - data[f'High_Next_{variable2}_Days']) / data[f'High_Next_{variable2}_Days'] * 100
    data[f'%_Diff_From_Low_Next_{variable2}_Days'] = (data['Close'] - data[f'Low_Next_{variable2}_Days']) / data[f'Low_Next_{variable2}_Days'] * 100

    return data


In [57]:
data['Date'] = pd.to_datetime(data['Date'])
variable1, variable2 = 7, 5
data = calculate_metrics(data, variable1, variable2)
#data.to_csv("crypto_data1.csv", index=False)

In [61]:
data

Unnamed: 0,Date,Open,High,Low,Close,High_Last_7_Days,Low_Last_7_Days,Days_Since_High_Last_7_Days,Days_Since_Low_Last_7_Days,%_Diff_From_High_Last_7_Days,%_Diff_From_Low_Last_7_Days,High_Next_5_Days,Low_Next_5_Days,%_Diff_From_High_Next_5_Days,%_Diff_From_Low_Next_5_Days
0,2018-07-11,6306.87,6405.59,6293.68,6394.36,6405.59,6293.68,0,0,-0.175316,1.599700,6761.84,6349.83,-5.434615,0.701279
1,2018-07-12,6394.36,6394.93,6084.00,6253.60,6405.59,6084.00,1,0,-2.372771,2.787640,7476.24,6349.83,-16.353675,-1.515474
2,2018-07-13,6253.66,6349.21,6131.54,6229.83,6405.59,6084.00,2,1,-2.743853,2.396943,7590.65,6349.83,-17.927582,-1.889814
3,2018-07-14,6229.61,6332.46,6190.18,6268.75,6405.59,6084.00,3,2,-2.136259,3.036654,7590.65,6349.83,-17.414846,-1.276885
4,2018-07-15,6268.32,6401.50,6245.75,6364.26,6405.59,6084.00,4,3,-0.645218,4.606509,7676.55,6349.83,-17.094789,0.227250
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1996,2023-12-28,43469.34,43817.61,42283.91,42588.94,44417.78,41616.59,6,2,-4.117360,2.336448,44210.89,41296.86,-3.668666,3.128761
1997,2023-12-29,42588.94,43134.59,41296.86,42073.59,44024.62,41296.86,6,0,-4.431679,1.880845,44210.89,41529.95,-4.834329,1.309031
1998,2023-12-30,42073.59,42595.81,41529.95,42146.03,43965.20,41296.86,6,1,-4.137750,2.056258,44210.89,41970.37,-4.670478,0.418533
1999,2023-12-31,42146.03,42863.11,41970.37,42280.14,43817.61,41296.86,3,2,-3.508795,2.381004,44210.89,42187.32,-4.367137,0.220019


In [46]:
def fetch_crypto_data(crypto_pair, start_date, end_date):
    url = "https://min-api.cryptocompare.com/data/v2/histoday"
    fsym, tsym = crypto_pair.split('/')
    params = {
        'fsym': fsym,
        'tsym': tsym,
        'limit': 2000,
        'toTs': pd.to_datetime(end_date).timestamp(),
    }
    headers = {'authorization': 'Apikey YOUR_API_KEY'}
    response = requests.get(url, headers=headers, params=params)
    data = response.json()
    df = pd.DataFrame(data['Data']['Data'])
    df['time'] = pd.to_datetime(df['time'], unit='s')
    df.rename(columns={'time': 'Date', 'open': 'Open', 'high': 'High', 'low': 'Low', 'close': 'Close'}, inplace=True)
    df = df[['Date', 'Open', 'High', 'Low', 'Close']]
    return df

In [47]:
# Usage Example
#data = fetch_crypto_data("BTC/USD", "2020-01-01", "2024-01-01")
#data.to_csv("crypto_data.csv", index=False)

In [58]:
df1 = pd.read_csv("/content/crypto_data1.csv")

In [59]:
def train_model(data):
    """
    Trains a machine learning model to predict future price differences.

    Parameters:
    - data (DataFrame): The DataFrame containing calculated metrics and target variables.

    Returns:
    - model: The trained machine learning model.
    - metrics: A dictionary with model evaluation metrics (RMSE and R-squared).
    """
    # Define the input features and target variables
    feature_cols = [
        f'Days_Since_High_Last_{variable1}_Days',
        f'%_Diff_From_High_Last_{variable1}_Days',
        f'Days_Since_Low_Last_{variable1}_Days',
        f'%_Diff_From_Low_Last_{variable1}_Days'
    ]
    target_cols = [
        f'%_Diff_From_High_Next_{variable2}_Days',
        f'%_Diff_From_Low_Next_{variable2}_Days'
    ]

    # Drop rows with NaN values in target columns
    data = data.dropna(subset=target_cols)

    # Separate features and target variables
    X = data[feature_cols]
    y = data[target_cols]

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Initialize and train the model (RandomForestRegressor)
    model = RandomForestRegressor(random_state=42)
    model.fit(X_train, y_train)

    # Evaluate the model
    y_pred = model.predict(X_test)
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    r2 = r2_score(y_test, y_pred)

    # Save the model
    joblib.dump(model, 'trained_model.pkl')

    # Return model and evaluation metrics
    metrics = {"RMSE": rmse, "R2_Score": r2}
    return model, metrics

def predict_outcomes(model, input_features):
    """
    Uses the trained model to predict future price differences.

    Parameters:
    - model: The trained machine learning model.
    - input_features (list or array): The list of input feature values for prediction.

    Returns:
    - predictions: A dictionary with predicted values for target variables.
    """

    # Make predictions
    prediction = model.predict([input_features])
    predictions = {
        f'%_Diff_From_High_Next_{variable2}_Days': prediction[0][0],
        f'%_Diff_From_Low_Next_{variable2}_Days': prediction[0][1]
    }
    return predictions


In [60]:
# Load your data (ensure data has the calculated metrics)
data = pd.read_csv('/content/crypto_data1.csv')

# Train the model
model, metrics = train_model(data)
print("Model Metrics:", metrics)

# Predict using new data
model = joblib.load('trained_model.pkl')
new_data = [7, -3.2, 5, 2.5]  # Example input feature values
predictions = predict_outcomes(model, new_data)
print("Predicted Outcomes:", predictions)

Model Metrics: {'RMSE': 6.586024159557861, 'R2_Score': 0.005971702625084063}
Predicted Outcomes: {'%_Diff_From_High_Next_5_Days': -4.370088686360933, '%_Diff_From_Low_Next_5_Days': 4.280568997081705}


