In [1]:
import requests
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [5]:

# Assuming data is prepared as per the earlier functions
def fetch_crypto_data(crypto_pair, start_date):
    """
    Fetches daily historical crypto data from the Binance API.

    Args:
        crypto_pair (str): Crypto pair in Binance format (e.g., 'BTCUSDT').
        start_date (str): Start date in 'YYYY-MM-DD' format.

    Returns:
        pd.DataFrame: DataFrame containing Date, Open, High, Low, Close prices.
    """
    url = 'https://api.binance.com/api/v3/klines'
    params = {
        'symbol': crypto_pair.upper(),
        'interval': '1d',
        'startTime': int(pd.Timestamp(start_date).timestamp() * 1000),
        'limit': 1000  # Maximum records per request
    }

    response = requests.get(url, params=params)
    if response.status_code != 200:
        raise Exception(f"API request failed: {response.status_code}")

    data = response.json()
    df = pd.DataFrame(data, columns=[
        'Open time', 'Open', 'High', 'Low', 'Close', 'Volume', 
        'Close time', 'Quote asset volume', 'Number of trades', 
        'Taker buy base asset volume', 'Taker buy quote asset volume', 'Ignore'
    ])

    df = df[['Open time', 'Open', 'High', 'Low', 'Close']]
    df['Date'] = pd.to_datetime(df['Open time'], unit='ms').dt.date
    df[['Open', 'High', 'Low', 'Close']] = df[['Open', 'High', 'Low', 'Close']].astype(float)
    return df[['Date', 'Open', 'High', 'Low', 'Close']]

In [7]:
def calculate_metrics(data, variable1, variable2):
    """
    Calculates historical and future metrics based on crypto data.

    Args:
        data (pd.DataFrame): DataFrame containing 'Date' and 'Close' columns.
        variable1 (int): Look-back period (e.g., 7 days).
        variable2 (int): Look-forward period (e.g., 5 days).

    Returns:
        pd.DataFrame: DataFrame with new metrics added.
    """
    data = data.copy()
    data.set_index('Date', inplace=True)

    # Historical high and low prices over look-back period
    data[f'High_Last_{variable1}_Days'] = data['Close'].rolling(window=variable1).max()
    data[f'Low_Last_{variable1}_Days'] = data['Close'].rolling(window=variable1).min()

    # Days since the most recent high/low price within the look-back period
    data[f'Days_Since_High_Last_{variable1}_Days'] = data[f'High_Last_{variable1}_Days'].expanding().apply(
        lambda x: (x.index[-1] - x.idxmax()).days)
    data[f'Days_Since_Low_Last_{variable1}_Days'] = data[f'Low_Last_{variable1}_Days'].expanding().apply(
        lambda x: (x.index[-1] - x.idxmin()).days)

    # Percentage differences from historical high and low prices
    data[f'%_Diff_From_High_Last_{variable1}_Days'] = (
        (data['Close'] - data[f'High_Last_{variable1}_Days']) / data[f'High_Last_{variable1}_Days'] * 100
    )
    data[f'%_Diff_From_Low_Last_{variable1}_Days'] = (
        (data['Close'] - data[f'Low_Last_{variable1}_Days']) / data[f'Low_Last_{variable1}_Days'] * 100
    )
   # Future high and low prices over look-forward period
    data[f'High_Next_{variable2}_Days'] = data['Close'].rolling(window=variable2).max().shift(-variable2)
    data[f'Low_Next_{variable2}_Days'] = data['Close'].rolling(window=variable2).min().shift(-variable2)

    # Percentage differences from future high and low prices
    data[f'%_Diff_From_High_Next_{variable2}_Days'] = (
        (data['Close'] - data[f'High_Next_{variable2}_Days']) / data[f'High_Next_{variable2}_Days'] * 100
    )
    data[f'%_Diff_From_Low_Next_{variable2}_Days'] = (
        (data['Close'] - data[f'Low_Next_{variable2}_Days']) / data[f'Low_Next_{variable2}_Days'] * 100
    )

    return data.reset_index()

In [9]:
def train_model(data):
    """
    Trains two Random Forest models for high and low price predictions.

    Args:
        data (pd.DataFrame): Data with calculated metrics for training.

    Returns:
        model1, model2: Trained models for predicting high and low price differences.
    """
    features = [
        'Days_Since_High_Last_7_Days', '%_Diff_From_High_Last_7_Days',
        'Days_Since_Low_Last_7_Days', '%_Diff_From_Low_Last_7_Days'
    ]
    target1 = '%_Diff_From_High_Next_5_Days'
    target2 = '%_Diff_From_Low_Next_5_Days'

    data = data.dropna(subset=features + [target1, target2])

    X = data[features]
    y1 = data[target1]
    y2 = data[target2]

    # Split data into training and testing sets
    X_train, X_test, y1_train, y1_test = train_test_split(X, y1, test_size=0.2, random_state=42)
    _, _, y2_train, y2_test = train_test_split(X, y2, test_size=0.2, random_state=42)

    model1 = RandomForestRegressor(random_state=42)
    model2 = RandomForestRegressor(random_state=42)

    model1.fit(X_train, y1_train)
    model2.fit(X_train, y2_train)

    mse1 = mean_squared_error(y1_test, model1.predict(X_test))
    mse2 = mean_squared_error(y2_test, model2.predict(X_test))

    print(f"Model 1 MSE: {mse1}")
    print(f"Model 2 MSE: {mse2}")

    return model1, model2

In [11]:
def predict_outcomes(model1, model2, input_features):
    """
    Predicts future percentage differences from high and low prices using trained models.

    Args:
        model1 (RandomForestRegressor): Model for predicting high price difference.
        model2 (RandomForestRegressor): Model for predicting low price difference.
        input_features (list): List of input values for features:
            [Days_Since_High_Last_7_Days, %_Diff_From_High_Last_7_Days,
             Days_Since_Low_Last_7_Days, %_Diff_From_Low_Last_7_Days]

    Returns:
        tuple: Predicted % difference from future high and low prices.
    """
    prediction_high = model1.predict([input_features])[0]
    prediction_low = model2.predict([input_features])[0]
    return prediction_high, prediction_low


In [13]:
def evaluate_crypto_trend(data, model1, model2, variable1=7, variable2=5):
    """
    Evaluates cryptocurrency trends by calculating metrics and using predictions.

    Args:
        data (pd.DataFrame): Data containing historical crypto prices.
        model1 (RandomForestRegressor): Model for predicting high price difference.
        model2 (RandomForestRegressor): Model for predicting low price difference.
        variable1 (int): Look-back period for metrics calculation.
        variable2 (int): Look-forward period for metrics calculation.

    Returns:
        pd.DataFrame: Data with calculated metrics and model predictions.
    """
    metrics_data = calculate_metrics(data, variable1, variable2)
    predictions = []
    
    for _, row in metrics_data.iterrows():
        input_features = [
            row[f'Days_Since_High_Last_{variable1}_Days'],
            row[f'%_Diff_From_High_Last_{variable1}_Days'],
            row[f'Days_Since_Low_Last_{variable1}_Days'],
            row[f'%_Diff_From_Low_Last_{variable1}_Days']
        ]
        
        if pd.notnull(input_features).all():
            pred_high, pred_low = predict_outcomes(model1, model2, input_features)
            predictions.append((pred_high, pred_low))
        else:
            predictions.append((None, None))
    
    metrics_data['Pred_%_Diff_From_High_Next_5_Days'], metrics_data['Pred_%_Diff_From_Low_Next_5_Days'] = zip(*predictions)
    
    return metrics_data

In [17]:
crypto_data = fetch_crypto_data('BTCUSDT', '2023-01-01')
print(crypto_data.head())


         Date      Open      High       Low     Close
0  2023-01-01  16541.77  16628.00  16499.01  16616.75
1  2023-01-02  16617.17  16799.23  16548.70  16672.87
2  2023-01-03  16672.78  16778.40  16605.28  16675.18
3  2023-01-04  16675.65  16991.87  16652.66  16850.36
4  2023-01-05  16850.36  16879.82  16753.00  16831.85


In [19]:
# Calculate metrics with a 7-day look-back and a 5-day look-forward period
metrics_data = calculate_metrics(crypto_data, variable1=7, variable2=5)
print(metrics_data.head())


         Date      Open      High       Low     Close  High_Last_7_Days  \
0  2023-01-01  16541.77  16628.00  16499.01  16616.75               NaN   
1  2023-01-02  16617.17  16799.23  16548.70  16672.87               NaN   
2  2023-01-03  16672.78  16778.40  16605.28  16675.18               NaN   
3  2023-01-04  16675.65  16991.87  16652.66  16850.36               NaN   
4  2023-01-05  16850.36  16879.82  16753.00  16831.85               NaN   

   Low_Last_7_Days  Days_Since_High_Last_7_Days  Days_Since_Low_Last_7_Days  \
0              NaN                          NaN                         NaN   
1              NaN                          NaN                         NaN   
2              NaN                          NaN                         NaN   
3              NaN                          NaN                         NaN   
4              NaN                          NaN                         NaN   

   %_Diff_From_High_Last_7_Days  %_Diff_From_Low_Last_7_Days  \
0         

In [21]:
# Train the model on the calculated metrics
model1, model2 = train_model(metrics_data)


Model 1 MSE: 7.494173212125108
Model 2 MSE: 7.909750918102557


In [23]:
# Example input for prediction: [days_since_high, %_diff_from_high, days_since_low, %_diff_from_low]
input_features = [3, -1.2, 5, 2.3]  # Replace these with actual values based on your data
predicted_high_diff, predicted_low_diff = predict_outcomes(model1, model2, input_features)

print(f"Predicted % Difference from High in next 5 days: {predicted_high_diff}")
print(f"Predicted % Difference from Low in next 5 days: {predicted_low_diff}")


Predicted % Difference from High in next 5 days: -11.670957521822315
Predicted % Difference from Low in next 5 days: -3.3503165570433913




In [27]:
import pandas as pd

def predict_outcomes(model1, model2, input_features):
    """
    Predicts future percentage differences from high and low prices using trained models.

    Args:
        model1 (RandomForestRegressor): Model for predicting high price difference.
        model2 (RandomForestRegressor): Model for predicting low price difference.
        input_features (list): List of input values for features:
            [Days_Since_High_Last_7_Days, %_Diff_From_High_Last_7_Days,
             Days_Since_Low_Last_7_Days, %_Diff_From_Low_Last_7_Days]

    Returns:
        tuple: Predicted % difference from future high and low prices.
    """
    # Define feature names as expected by the model
    feature_names = [
        'Days_Since_High_Last_7_Days', 
        '%_Diff_From_High_Last_7_Days', 
        'Days_Since_Low_Last_7_Days', 
        '%_Diff_From_Low_Last_7_Days'
    ]
    
    # Convert input features into a DataFrame with feature names
    input_df = pd.DataFrame([input_features], columns=feature_names)
    
    # Make predictions using the trained models
    prediction_high = model1.predict(input_df)[0]
    prediction_low = model2.predict(input_df)[0]
    
    return prediction_high, prediction_low


In [32]:
evaluated_data = evaluate_crypto_trend(crypto_data, model1, model2, variable1=7, variable2=5)
print(evaluated_data.head())


         Date      Open      High       Low     Close  High_Last_7_Days  \
0  2023-01-01  16541.77  16628.00  16499.01  16616.75               NaN   
1  2023-01-02  16617.17  16799.23  16548.70  16672.87               NaN   
2  2023-01-03  16672.78  16778.40  16605.28  16675.18               NaN   
3  2023-01-04  16675.65  16991.87  16652.66  16850.36               NaN   
4  2023-01-05  16850.36  16879.82  16753.00  16831.85               NaN   

   Low_Last_7_Days  Days_Since_High_Last_7_Days  Days_Since_Low_Last_7_Days  \
0              NaN                          NaN                         NaN   
1              NaN                          NaN                         NaN   
2              NaN                          NaN                         NaN   
3              NaN                          NaN                         NaN   
4              NaN                          NaN                         NaN   

   %_Diff_From_High_Last_7_Days  %_Diff_From_Low_Last_7_Days  \
0         

In [34]:
# Export metrics data to Excel
metrics_data.to_excel("crypto_metrics.xlsx", index=False)
print("Excel file saved as 'crypto_metrics.xlsx'.")



Excel file saved as 'crypto_metrics.xlsx'.
