Demonstration of MLFLOW.

In [1]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report
import warnings
warnings.filterwarnings('ignore')

In [2]:

!pip install smartapi-python pandas numpy matplotlib pyotp logzero websocket-client pycryptodome
!pip install xgboost matplotlib


Collecting smartapi-python
  Downloading smartapi_python-1.5.5-py3-none-any.whl.metadata (7.2 kB)
Collecting pyotp
  Downloading pyotp-2.9.0-py3-none-any.whl.metadata (9.8 kB)
Collecting logzero
  Downloading logzero-1.7.0-py2.py3-none-any.whl.metadata (11 kB)
Collecting pycryptodome
  Downloading pycryptodome-3.21.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)
Downloading smartapi_python-1.5.5-py3-none-any.whl (28 kB)
Downloading pyotp-2.9.0-py3-none-any.whl (13 kB)
Downloading logzero-1.7.0-py2.py3-none-any.whl (16 kB)
Downloading pycryptodome-3.21.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m21.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: logzero, pyotp, pycryptodome, smartapi-python
Successfully installed logzero-1.7.0 pycryptodome-3.21.0 pyotp-2.9.0 smartapi-python-1.5.5


In [3]:
!pip install mlflow
import mlflow
import mlflow.sklearn
import mlflow.xgboost

Collecting mlflow
  Downloading mlflow-2.20.1-py3-none-any.whl.metadata (30 kB)
Collecting mlflow-skinny==2.20.1 (from mlflow)
  Downloading mlflow_skinny-2.20.1-py3-none-any.whl.metadata (31 kB)
Collecting alembic!=1.10.0,<2 (from mlflow)
  Downloading alembic-1.14.1-py3-none-any.whl.metadata (7.4 kB)
Collecting docker<8,>=4.0.0 (from mlflow)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting gunicorn<24 (from mlflow)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting databricks-sdk<1,>=0.20.0 (from mlflow-skinny==2.20.1->mlflow)
  Downloading databricks_sdk-0.43.0-py3-none-any.whl.metadata (38 kB)
Collecting Mako (from alembic!=1.10.0,<2->mlflow)
  Downloading Mako-1.3.9-py3-none-any.whl.metadata (2.9 kB)
Collecting graphql-core<3.3,>=3.1 (from graphene<4->mlflow)
  Downloading graphql_core-3.2.6-py3-none-any.whl.metadata (11 kB)
Colle

In [4]:
import pandas as pd
import numpy as np
import time
import pyotp
from logzero import logger
from SmartApi.smartConnect import SmartConnect
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from tabulate import tabulate

# =======================
# 🔹 USER CONFIGURATION
# =======================
API_KEY = "uMU5cqo6"
CLIENT_CODE = "P62342768"
PASSWORD = "2611"
TOTP_SECRET = "PHWQUE25HTYKZ4WMWCGNOB5RQU"
EXCHANGE = "NSE"
INTERVAL = "FIVE_MINUTE"

# Top 5 stocks for analysis
STOCKS_TO_ANALYZE = ["11256", "11626", "11253", "11580", "24969"]

# =======================
# 🔹 HELPER FUNCTIONS
# =======================
def login_smart_api():
    """Login to the Smart API."""
    try:
        obj = SmartConnect(api_key=API_KEY)
        totp = pyotp.TOTP(TOTP_SECRET).now()
        obj.generateSession(CLIENT_CODE, PASSWORD, totp)
        return obj
    except Exception as e:
        logger.error(f"Login Error: {e}")
        return None

def fetch_historical_data(api_obj, stock_token, from_date, to_date):
    """Fetch historical data for a stock."""
    try:
        params = {
            "exchange": EXCHANGE,
            "symboltoken": stock_token,
            "interval": INTERVAL,
            "fromdate": from_date,
            "todate": to_date,
        }
        data = api_obj.getCandleData(params)
        df = pd.DataFrame(data['data'], columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        return df
    except Exception as e:
        logger.error(f"Data Fetch Error: {e}")
        return None

def validate_prediction(validation_df, signal, target_price, stop_loss):
    """Validate if target or stop loss was hit."""
    try:
        if signal == "BUY":
            target_hit = validation_df[validation_df['high'] >= target_price]
            stop_hit = validation_df[validation_df['low'] <= stop_loss]
        else:
            target_hit = validation_df[validation_df['low'] <= target_price]
            stop_hit = validation_df[validation_df['high'] >= stop_loss]

        if not target_hit.empty:
            return "Target Hit", target_hit.iloc[0]['timestamp']
        elif not stop_hit.empty:
            return "Stop Loss Hit", stop_hit.iloc[0]['timestamp']
        else:
            return "No Outcome", None
    except Exception as e:
        logger.error(f"Validation Error: {e}")
        return "Error", None

# =======================
# 🔹 MODEL DEFINITIONS
# =======================
def model1_logistic_rsi(train_df):
    """Logistic Regression with RSI."""
    try:
        # Feature Engineering
        delta = train_df['close'].diff()
        gain = delta.where(delta > 0, 0).rolling(14).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(14).mean()
        train_df['RSI'] = 100 - (100 / (1 + (gain / loss)))
        train_df.dropna(inplace=True)

        if len(train_df) < 15:
            return None  # Insufficient data

        # Prepare data
        X = train_df[['RSI']][:-1]
        y = np.where(train_df['close'].shift(-1) > train_df['close'], 1, 0)[:-1]

        # Train model
        model = LogisticRegression()
        model.fit(X, y)
        return model
    except Exception as e:
        logger.error(f"Model 1 Training Error: {e}")
        return None

def model2_xgb_basic(train_df):
    """XGBoost with OHLCV features."""
    try:
        train_df['target'] = (train_df['close'].pct_change().shift(-1) > 0).astype(int)
        train_df.dropna(inplace=True)

        if len(train_df) < 10:
            return None

        X = train_df[['open', 'high', 'low', 'close', 'volume']][:-1]
        y = train_df['target'][:-1]

        model = XGBClassifier()
        model.fit(X, y)
        return model
    except Exception as e:
        logger.error(f"Model 2 Training Error: {e}")
        return None

def model3_xgb_advanced(train_df):
    """XGBoost with advanced features."""
    try:
        # Feature Engineering
        train_df['SMA_5'] = train_df['close'].rolling(5).mean()
        train_df['SMA_10'] = train_df['close'].rolling(10).mean()
        train_df['momentum'] = train_df['close'] - train_df['close'].shift(5)
        train_df['breakout'] = (train_df['close'] > train_df['high'].rolling(10).max().shift(1)).astype(int)
        train_df.dropna(inplace=True)

        if len(train_df) < 15:
            return None

        X = train_df[['momentum', 'breakout', 'SMA_5', 'SMA_10']][:-1]
        y = np.where(train_df['close'].shift(-1) > train_df['close'], 1, 0)[:-1]

        model = XGBClassifier()
        model.fit(X, y)
        return model
    except Exception as e:
        logger.error(f"Model 3 Training Error: {e}")
        return None

def model4_random_forest(train_df):
    """Random Forest with multiple indicators."""
    try:
        train_df['SMA_5'] = train_df['close'].rolling(5).mean()
        train_df['SMA_10'] = train_df['close'].rolling(10).mean()
        train_df['momentum'] = train_df['close'] - train_df['close'].shift(5)
        train_df.dropna(inplace=True)

        if len(train_df) < 15:
            return None

        X = train_df[['SMA_5', 'SMA_10', 'momentum']][:-1]
        y = np.where(train_df['close'].shift(-1) > train_df['close'], 1, 0)[:-1]

        model = RandomForestClassifier()
        model.fit(X, y)
        return model
    except Exception as e:
        logger.error(f"Model 4 Training Error: {e}")
        return None

def model5_svm(train_df):
    """SVM with technical patterns."""
    try:
        train_df['SMA_5'] = train_df['close'].rolling(5).mean()
        train_df['SMA_10'] = train_df['close'].rolling(10).mean()
        train_df['momentum'] = train_df['close'] - train_df['close'].shift(5)
        train_df.dropna(inplace=True)

        if len(train_df) < 15:
            return None

        X = train_df[['SMA_5', 'SMA_10', 'momentum']][:-1]
        y = np.where(train_df['close'].shift(-1) > train_df['close'], 1, 0)[:-1]

        model = SVC()
        model.fit(X, y)
        return model
    except Exception as e:
        logger.error(f"Model 5 Training Error: {e}")
        return None

# =======================
# 🔹 PREDICTION FUNCTIONS
# =======================
def predict_model1(model, train_df):
    """Predict using Logistic Regression (RSI)."""
    try:
        latest_data = train_df[['RSI']].iloc[-1:]
        prediction = model.predict(latest_data)[0]
        close_price = train_df['close'].iloc[-1]

        if prediction == 1:
            return "BUY", round(close_price * 1.01, 2), round(close_price * 0.98, 2)
        else:
            return "SELL", round(close_price * 0.99, 2), round(close_price * 1.02, 2)
    except Exception as e:
        logger.error(f"Model1 Prediction Error: {e}")
        return None, None, None

def predict_model2(model, train_df):
    """Predict using XGBoost (Basic)."""
    try:
        latest_data = train_df[['open', 'high', 'low', 'close', 'volume']].iloc[-1:]
        prediction = model.predict(latest_data)[0]
        close_price = latest_data['close'].values[0]

        if prediction == 1:
            return "BUY", round(close_price * 1.015, 2), round(close_price * 0.985, 2)
        else:
            return "SELL", round(close_price * 0.985, 2), round(close_price * 1.015, 2)
    except Exception as e:
        logger.error(f"Model2 Prediction Error: {e}")
        return None, None, None

# =======================
# 🔹 MAIN EXECUTION
# =======================
if __name__ == "__main__":
    api_obj = login_smart_api()
    if not api_obj:
        exit("Failed to login to API")

    # Define models to compare
    MODELS = [
        {
            'name': 'Logistic Regression (RSI)',
            'train': model1_logistic_rsi,
            'predict': predict_model1
        },
        {
            'name': 'XGBoost (Basic)',
            'train': model2_xgb_basic,
            'predict': predict_model2
        },
        {
            'name': 'XGBoost (Advanced)',
            'train': model3_xgb_advanced,
            'predict': predict_model2
        },
        {
            'name': 'Random Forest',
            'train': model4_random_forest,
            'predict': predict_model2
        },
        {
            'name': 'SVM',
            'train': model5_svm,
            'predict': predict_model2
        }
    ]

    results = []

    for stock in STOCKS_TO_ANALYZE:
        logger.info(f"Analyzing {stock}...")

        # Fetch training and validation data
        train_df = fetch_historical_data(api_obj, stock, "2025-02-06 09:30", "2025-02-06 12:30")
        valid_df = fetch_historical_data(api_obj, stock, "2025-02-06 12:30", "2025-02-06 15:15")

        if train_df is None or valid_df is None:
            continue

        for model in MODELS:
            try:
                # Train model
                trained_model = model['train'](train_df)
                if not trained_model:
                    continue

                # Predict and validate
                signal, target, sl = model['predict'](trained_model, train_df)
                result, event_time = validate_prediction(valid_df, signal, target, sl)

                # Store results
                results.append({
                    'Stock': stock,
                    'Model': model['name'],
                    'Signal': signal,
                    'Target': target,
                    'Stop Loss': sl,
                    'Result': result,
                    'Event Time': event_time
                })

            except Exception as e:
                logger.error(f"{model['name']} failed: {str(e)}")

    # Generate report
    results_df = pd.DataFrame(results)
    print(tabulate(results_df, headers='keys', tablefmt='pretty', showindex=False))

[I 250210 16:26:09 smartConnect:124] in pool
[I 250210 16:26:11 <ipython-input-4-dd20a62916e9>:264] Analyzing 11256...
[E 250210 16:26:13 <ipython-input-4-dd20a62916e9>:221] Model2 Prediction Error: feature_names mismatch: ['momentum', 'breakout', 'SMA_5', 'SMA_10'] ['open', 'high', 'low', 'close', 'volume']
    expected momentum, SMA_5, SMA_10, breakout in input data
    training data did not have the following fields: high, open, low, volume, close
[I 250210 16:26:13 <ipython-input-4-dd20a62916e9>:264] Analyzing 11626...
[E 250210 16:26:14 <ipython-input-4-dd20a62916e9>:221] Model2 Prediction Error: feature_names mismatch: ['momentum', 'breakout', 'SMA_5', 'SMA_10'] ['open', 'high', 'low', 'close', 'volume']
    expected momentum, SMA_5, SMA_10, breakout in input data
    training data did not have the following fields: high, open, low, volume, close
[I 250210 16:26:14 <ipython-input-4-dd20a62916e9>:264] Analyzing 11253...
[E 250210 16:26:16 <ipython-input-4-dd20a62916e9>:221] Model2

+-------+---------------------------+--------+--------+-----------+---------------+---------------------------+
| Stock |           Model           | Signal | Target | Stop Loss |    Result     |        Event Time         |
+-------+---------------------------+--------+--------+-----------+---------------+---------------------------+
| 11256 | Logistic Regression (RSI) |  SELL  | 143.54 |  147.89   | Stop Loss Hit | 2025-02-06 14:00:00+05:30 |
| 11256 |      XGBoost (Basic)      |  SELL  | 142.82 |  147.16   | Stop Loss Hit | 2025-02-06 14:00:00+05:30 |
| 11256 |    XGBoost (Advanced)     |        |  nan   |    nan    |  No Outcome   |            NaT            |
| 11626 | Logistic Regression (RSI) |  SELL  | 571.72 |  589.05   |  No Outcome   |            NaT            |
| 11626 |      XGBoost (Basic)      |  SELL  | 568.84 |  586.16   |  No Outcome   |            NaT            |
| 11626 |    XGBoost (Advanced)     |        |  nan   |    nan    |  No Outcome   |            NaT      

In [None]:
import mlflow
import mlflow.sklearn
from sklearn.metrics import accuracy_score
# Ideally you will not require following 4 lines if you have started fresh and do not have any previous dagshub credentials on your computer
import os
os.environ['MLFLOW_TRACKING_USERNAME'] = 'DID-85' # 'learnpythonlanguage'
os.environ['MLFLOW_TRACKING_PASSWORD'] = 'Didhit@1234' #
os.environ['MLFLOW_TRACKING_URI'] = 'https://dagshub.com/DID-85/MlFlow_Demo_didhit_patel.mlflow' # https://dagshub.com/learnpythonlanguage/mlflow_dagshub_demo.mlflow



# Your existing models definition:
MODELS = [
    {
        'name': 'Logistic Regression (RSI)',
        'train': model1_logistic_rsi,  # Function that trains and returns a trained Logistic Regression model using RSI features
        'predict': predict_model1      # Function that predicts given the trained model and test data
    },
    {
        'name': 'XGBoost (Basic)',
        'train': model2_xgb_basic,
        'predict': predict_model2
    },
    {
        'name': 'XGBoost (Advanced)',
        'train': model3_xgb_advanced,
        'predict': predict_model2
    },
    {
        'name': 'Random Forest',
        'train': model4_random_forest,
        'predict': predict_model2
    },
    {
        'name': 'SVM',
        'train': model5_svm,
        'predict': predict_model2
    }
]

results = []

# Loop through each model, train, evaluate, and log with MLflow
for model_info in MODELS:
    model_name = model_info['name']
    train_func = model_info['train']
    predict_func = model_info['predict']

    with mlflow.start_run(run_name=model_name):
        # Train the model (assumes the train function returns a trained model instance)
        trained_model = train_func()

        # Predict on test data using your predict function.
        # It is assumed that your predict function signature accepts (trained_model, X_test).
        y_pred = predict_func(trained_model, X_test)

        # Calculate the accuracy (or any other metric you prefer)
        accuracy = accuracy_score(y_test, y_pred)

        # Log the metric
        mlflow.log_metric('accuracy', accuracy)

        # Log the model. If your trained_model is a scikit-learn model, this works directly.
        mlflow.sklearn.log_model(trained_model, model_name)

        # Optionally, log additional parameters if available:
        # For example, if your train function uses internal parameters, you could log them here.
        # mlflow.log_param('some_parameter', value)

        results.append({'model': model_name, 'accuracy': accuracy})

        print(f"Logged {model_name} with accuracy: {accuracy:.4f}")


(array([0, 1]), array([900, 100]))