In [None]:
# Install ZenML
!pip install "zenml[server]"

# Optional: Clean up any existing ZenML configuration to ensure a fresh start
!rm -rf .zen

# Initialize ZenML
!zenml init

# Install the required ZenML integrations and additional dependencies
!zenml integration install sklearn -y
!pip install pyparsing==2.4.2

# Restart the kernel to apply changes
import IPython
IPython.Application.instance().kernel.do_shutdown(restart=True)

Collecting zenml[server]
  Downloading zenml-0.65.0-py3-none-any.whl.metadata (20 kB)
Collecting alembic<1.9.0,>=1.8.1 (from zenml[server])
  Downloading alembic-1.8.1-py3-none-any.whl.metadata (7.2 kB)
Collecting bcrypt==4.0.1 (from zenml[server])
  Downloading bcrypt-4.0.1-cp36-abi3-manylinux_2_28_x86_64.whl.metadata (9.0 kB)
Collecting click<8.1.4,>=8.0.1 (from zenml[server])
  Downloading click-8.1.3-py3-none-any.whl.metadata (3.2 kB)
Collecting click-params<0.4.0,>=0.3.0 (from zenml[server])
  Downloading click_params-0.3.0-py3-none-any.whl.metadata (3.0 kB)
Collecting docker<7.2.0,>=7.1.0 (from zenml[server])
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting gitpython<4.0.0,>=3.1.18 (from zenml[server])
  Downloading GitPython-3.1.43-py3-none-any.whl.metadata (13 kB)
Collecting httplib2<0.20,>=0.19.1 (from zenml[server])
  Downloading httplib2-0.19.1-py3-none-any.whl.metadata (2.2 kB)
Collecting passlib<1.8.0,>=1.7.4 (from passlib[bcrypt]<1.8.0,>=1.7.4->zen

[1;35mNumExpr defaulting to 2 threads.[0m
[?25l[1;35mInitializing the ZenML global configuration version to 0.65.0[0m
[32m⠋[0m Initializing ZenML repository at /content.
[1;35mCreating database tables[0m
[2K[1A[2K[32m⠙[0m Initializing ZenML repository at /content.
[2K[1A[2K[32m⠹[0m Initializing ZenML repository at /content.
[2K[1A[2K[32m⠸[0m Initializing ZenML repository at /content.
[2K[1A[2K[32m⠼[0m Initializing ZenML repository at /content.
[2K[1A[2K[32m⠴[0m Initializing ZenML repository at /content.
[2K[1A[2K[32m⠦[0m Initializing ZenML repository at /content.
[2K[1A[2K[32m⠧[0m Initializing ZenML repository at /content.
[2K[1A[2K[32m⠇[0m Initializing ZenML repository at /content.
[2K[1A[2K[32m⠏[0m Initializing ZenML repository at /content.
[2K[1A[2K[32m⠋[0m Initializing ZenML repository at /content.
[2K[1A[2K[32m⠙[0m Initializing ZenML repository at /content.
[2K[1A[2K[32m⠹[0m Initializing ZenML repository at /cont

{'status': 'ok', 'restart': True}

In [None]:
# Install Dask for scalable data processing
# !pip install dask[complete]

In [None]:
# Install VectorBT for backtesting and financial analysis
!pip install vectorbt

Collecting vectorbt
  Downloading vectorbt-0.26.2.tar.gz (485 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/485.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m485.9/485.9 kB[0m [31m20.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting dill (from vectorbt)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting dateparser (from vectorbt)
  Downloading dateparser-1.2.0-py2.py3-none-any.whl.metadata (28 kB)
Collecting schedule (from vectorbt)
  Downloading schedule-1.2.2-py3-none-any.whl.metadata (3.8 kB)
Collecting mypy_extensions (from vectorbt)
  Downloading mypy_extensions-1.0.0-py3-none-any.whl.metadata (1.1 kB)
Collecting numba<0.57.0,>=0.56.0 (from vectorbt)
  Downloading numba-0.56.4-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (2.8 kB)
Collecting llvmlite<0.40,>=0.39.0dev0 (from numba<0.57.0,>=0.56.0->vecto

In [None]:
!pip install bentoml



In [None]:
# Install other necessary libraries for the pipeline
!pip install scikit-learn



In [None]:
from zenml import pipeline, step
import pandas as pd
import requests
import vectorbt as vbt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import bentoml
import logging
from sklearn.impute import SimpleImputer
%matplotlib inline

# Setup logging for debugging and tracking pipeline steps
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Define an acceptable threshold for MSE
MSE_THRESHOLD = 1e-4  # This is a typical threshold; you can adjust based on your specific requirements

# Step 1: Data Ingestion - Fetch data from an API
@step
def data_ingestion() -> pd.DataFrame:
    try:
        api_url = 'https://eodhd.com/api/intraday/EURUSD.FOREX?api_token=658e841fa1a6f9.30546411&fmt=json'
        response = requests.get(api_url)
        data = pd.DataFrame(response.json())
        logger.info("Data ingestion completed successfully")
        return data
    except Exception as e:
        logger.error(f"Data ingestion failed: {e}")
        raise

# Step 2: Feature Engineering - Preprocess data and create features
@step
def feature_engineering(data: pd.DataFrame) -> pd.DataFrame:
    try:
        data['datetime'] = pd.to_datetime(data['datetime'])
        data.set_index('datetime', inplace=True)
        data['returns'] = data['close'].pct_change()

        # Impute NaN values in the 'returns' column
        imputer = SimpleImputer(strategy='mean')  # You can use 'mean', 'median', or another strategy
        data['returns'] = imputer.fit_transform(data[['returns']])

        logger.info("Feature engineering completed successfully")
        return data
    except Exception as e:
        logger.error(f"Feature engineering failed: {e}")
        raise

# Step 3: Model Training - Train a Linear Regression model
@step
def model_training(features: pd.DataFrame) -> tuple[LinearRegression, float]:
    try:
        X = features['returns'].values.reshape(-1, 1)
        y = features['close'].values

        # Impute NaN values in y (e.g., with the median value)
        imputer = SimpleImputer(strategy='median')
        y = imputer.fit_transform(y.reshape(-1, 1)).ravel()

        model = LinearRegression()
        model.fit(X, y)
        score = model.score(X, y)
        logger.info(f"Model trained with score: {score}")
        return model, score
    except Exception as e:
        logger.error(f"Model training failed: {e}")
        raise

# Step 4: Model Evaluation - Evaluate the model using MSE
@step
def model_evaluation(model: LinearRegression, features: pd.DataFrame) -> float:
    try:
        X = features['returns'].values.reshape(-1, 1)
        y = features['close'].values

        # Handle NaN values in y (either drop or impute)
        mask = ~pd.isna(y)
        X = X[mask]
        y = y[mask]

        predictions = model.predict(X)

        # Handle NaN values in predictions
        if pd.isna(predictions).any():
            logger.error("Predictions contain NaN values.")
            raise ValueError("Predictions contain NaN values.")

        mse = mean_squared_error(y, predictions)
        logger.info(f"Model evaluation completed with MSE: {mse}")
        return mse
    except Exception as e:
        logger.error(f"Model evaluation failed: {e}")
        raise

# Step 5: Backtesting - Test the model performance using past data and visualize the results
@step
def backtest(features: pd.DataFrame):
    try:
        # Compute moving averages
        fast_ma = vbt.MA.run(features['close'], window=10)
        slow_ma = vbt.MA.run(features['close'], window=50)

        # Generate signals based on moving averages
        entries = fast_ma.ma_crossed_above(slow_ma)
        exits = fast_ma.ma_crossed_below(slow_ma)

        # Create a portfolio using these signals
        portfolio = vbt.Portfolio.from_signals(
            close=features['close'],
            entries=entries,
            exits=exits,
            init_cash=100_000,
            fees=0.001
        )

        # Plotting the results
        portfolio.plot().show()  # Comprehensive portfolio summary plot

        # Plot detailed trades and orders
        fig_orders = portfolio.plot_orders()
        fig_trades = portfolio.plot_trades()

        # Show the figures
        fig_orders.show()
        fig_trades.show()

        # Log the total return
        logger.info(f"Total Return: {portfolio.total_return()}")
        logger.info("Backtesting completed successfully")
    except Exception as e:
        logger.error(f"Backtesting failed: {e}")
        raise

# Step 6: Model Deployment - Save the trained model using BentoML
@step
def model_deployment(model: LinearRegression) -> str:
    try:
        # Save the model using BentoML
        bento_svc = bentoml.sklearn.save_model("forex_model", model)

        # Return the model tag as a string, which is a unique identifier for the saved model
        model_tag = str(bento_svc.tag)

        logger.info(f"Model deployment completed successfully with tag: {model_tag}")
        return model_tag
    except Exception as e:
        logger.error(f"Model deployment failed: {e}")
        raise

INFO:numexpr.utils:NumExpr defaulting to 2 threads.


[1;35mNumExpr defaulting to 2 threads.[0m


In [None]:
# Define the complete pipeline
@pipeline
def forex_pipeline():
    data = data_ingestion()
    features = feature_engineering(data)
    model, score = model_training(features)
    mse = model_evaluation(model, features)

    # if mse > MSE_THRESHOLD:
    #     logger.warning(f"Model MSE {mse} is above the threshold {MSE_THRESHOLD}, retraining model.")
    #     model, score = model_training(features)
    #     mse = model_evaluation(model, features)

    backtest(features)
    bento_svc = model_deployment(model)

# Instantiate the pipeline
forex_svc_pipeline = forex_pipeline()

# Run the pipeline
# forex_svc_pipeline.run(unlisted=True)

[1;35mInitiating a new run for the pipeline: [0m[1;36mforex_pipeline[1;35m.[0m


Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



[1;35mRegistered new pipeline: [0m[1;36mforex_pipeline[1;35m.[0m
[1;35mExecuting a new run.[0m
[1;35mUsing user: [0m[1;36mdefault[1;35m[0m
[1;35mUsing stack: [0m[1;36mdefault[1;35m[0m
[1;35m  orchestrator: [0m[1;36mdefault[1;35m[0m
[1;35m  artifact_store: [0m[1;36mdefault[1;35m[0m
[1;35mYou can visualize your pipeline runs in the [0m[1;36mZenML Dashboard[1;35m. In order to try it locally, please run [0m[1;36mzenml up[1;35m.[0m
[1;35mStep [0m[1;36mdata_ingestion[1;35m has started.[0m
INFO:__main__:Data ingestion completed successfully
[1;35mData ingestion completed successfully[0m
[1;35mStep [0m[1;36mdata_ingestion[1;35m has finished in [0m[1;36m2.506s[1;35m.[0m
[1;35mStep [0m[1;36mdata_ingestion[1;35m completed successfully.[0m
[1;35mStep [0m[1;36mfeature_engineering[1;35m has started.[0m
  data['returns'] = data['close'].pct_change()
INFO:__main__:Feature engineering completed successfully
[1;35mFeature engineering completed

INFO:__main__:Total Return: -0.41451733677407093
[1;35mTotal Return: -0.41451733677407093[0m
INFO:__main__:Backtesting completed successfully
[1;35mBacktesting completed successfully[0m
[1;35mStep [0m[1;36mbacktest[1;35m has finished in [0m[1;36m1m19s[1;35m.[0m
[1;35mStep [0m[1;36mbacktest[1;35m completed successfully.[0m
[1;35mStep [0m[1;36mmodel_training[1;35m has started.[0m
INFO:__main__:Model trained with score: 0.00012741530299509307
[1;35mModel trained with score: 0.00012741530299509307[0m
[1;35mStep [0m[1;36mmodel_training[1;35m has finished in [0m[1;36m1.143s[1;35m.[0m
[1;35mStep [0m[1;36mmodel_training[1;35m completed successfully.[0m
[1;35mStep [0m[1;36mmodel_deployment[1;35m has started.[0m
INFO:bentoml.sklearn:Using the default model signature for scikit-learn ({'predict': {'batchable': False}}) for model "forex_model".
[1;35mUsing the default model signature for scikit-learn ({'predict': {'batchable': False}}) for model "forex_mo

In [None]:
!zenml up