In [1]:
%pip install scikit-learn


Collecting scikit-learn
  Downloading scikit_learn-1.5.2-cp312-cp312-win_amd64.whl.metadata (13 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Downloading threadpoolctl-3.5.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.5.2-cp312-cp312-win_amd64.whl (11.0 MB)
   ---------------------------------------- 0.0/11.0 MB ? eta -:--:--
   -------- ------------------------------- 2.4/11.0 MB 12.2 MB/s eta 0:00:01
   ---------------- ----------------------- 4.5/11.0 MB 11.2 MB/s eta 0:00:01
   --------------------------------- ------ 9.2/11.0 MB 15.4 MB/s eta 0:00:01
   ---------------------------------------- 11.0/11.0 MB 16.0 MB/s eta 0:00:00
Downloading threadpoolctl-3.5.0-py3-none-any.whl (18 kB)
Installing collected packages: threadpoolctl, scikit-learn
Successfully installed scikit-learn-1.5.2 threadpoolctl-3.5.0
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import pickle

def train_model(data: pd.DataFrame):
    # Define input features and target variables
    feature_columns = [
        'Days_Since_High_Last_{}_Days'.format(variable1),
        '%_Diff_From_High_Last_{}_Days'.format(variable1),
        'Days_Since_Low_Last_{}_Days'.format(variable1),
        '%_Diff_From_Low_Last_{}_Days'.format(variable1)
    ]
    
    target_columns = [
        '%_Diff_From_High_Next_{}_Days'.format(variable2),
        '%_Diff_From_Low_Next_{}_Days'.format(variable2)
    ]
    
    # Drop rows with NaN values in target columns (place this within train_model)
    data = data.dropna(subset=target_columns)
    
    # Extract features and target variables
    X = data[feature_columns]
    y = data[target_columns]
    
    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Initialize and train the model
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    
    # Evaluate the model on the test set
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    print(f"Model Mean Squared Error: {mse}")
    print(f"Model R-squared Score: {r2}")
    
    # Save the model to a file for future use
    with open("crypto_price_predictor.pkl", "wb") as f:
        pickle.dump(model, f)
    
    return model

def predict_outcomes(model, days_since_high, diff_from_high, days_since_low, diff_from_low):
    
    # Prepare input data for prediction
    input_data = [[days_since_high, diff_from_high, days_since_low, diff_from_low]]
    predicted_diff = model.predict(input_data)[0]
    
    return {
        'Predicted_%_Diff_From_High_Next_Days': predicted_diff[0],
        'Predicted_%_Diff_From_Low_Next_Days': predicted_diff[1]
    }

if __name__ == "__main__":
    # Load the prepared data with calculated metrics (adjust filename as needed)
    data = pd.read_csv("crypto_metrics_data.csv")
    
    variable1 = 7  
    variable2 = 5  
    
    # Train the model
    model = train_model(data)
    
    days_since_high = 2
    diff_from_high = -1.5
    days_since_low = 3
    diff_from_low = 2.0
    
    predictions = predict_outcomes(model, days_since_high, diff_from_high, days_since_low, diff_from_low)
    print("Predicted Outcomes:", predictions)


Model Mean Squared Error: 4.4173677734064345
Model R-squared Score: 0.508095180775063
Predicted Outcomes: {'Predicted_%_Diff_From_High_Next_Days': -1.9385956976400012, 'Predicted_%_Diff_From_Low_Next_Days': 2.136563355310002}


