<a href="https://colab.research.google.com/github/Sameersah/mid-term-catch-assignment/blob/main/c)_End2end_Mlops_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# End2end Mlops System

In [None]:
### Step 1: Install Required Libraries
# Install necessary libraries for the MLOps pipeline
!pip install mlflow prefect scikit-learn pandas numpy matplotlib sklearn

### Step 2: Import Required Libraries
# Import libraries for data processing, modeling, and MLOps
import numpy as np
import pandas as pd
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import mlflow
import mlflow.sklearn
from prefect import flow, task

### Step 3: Define the Dataset
# Load a dataset (e.g., Boston Housing dataset)
data = load_boston()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target, name="price")

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Step 4: Set Up MLflow Tracking
# Initialize MLflow experiment
mlflow.set_experiment("end2end_mlops_system")

@task
def train_model(X_train, y_train):
    """Train a Random Forest model and log it to MLflow"""
    with mlflow.start_run():
        model = RandomForestRegressor(n_estimators=100, random_state=42)
        model.fit(X_train, y_train)
        mlflow.sklearn.log_model(model, "random_forest_model")
        mlflow.log_param("n_estimators", 100)
        return model

@task
def evaluate_model(model, X_test, y_test):
    """Evaluate the model and log metrics to MLflow"""
    predictions = model.predict(X_test)
    rmse = mean_squared_error(y_test, predictions, squared=False)
    mlflow.log_metric("rmse", rmse)
    return rmse

@flow
def mlops_pipeline():
    """Define the complete MLOps pipeline"""
    print("Starting pipeline")
    model = train_model(X_train, y_train)
    rmse = evaluate_model(model, X_test, y_test)
    print(f"Pipeline completed. RMSE: {rmse}")

# Run the pipeline
mlops_pipeline()

### Step 5: Serve the Model
# MLflow Model Serving
print("To serve the model, use MLflow CLI:")
print("mlflow models serve -m runs:/<run_id>/random_forest_model --port 5000")
