In [1]:
!pip install mlflow scikit-learn pandas matplotlib


Collecting mlflow
  Downloading mlflow-2.21.3-py3-none-any.whl.metadata (30 kB)
Collecting mlflow-skinny==2.21.3 (from mlflow)
  Downloading mlflow_skinny-2.21.3-py3-none-any.whl.metadata (31 kB)
Collecting alembic!=1.10.0,<2 (from mlflow)
  Downloading alembic-1.15.2-py3-none-any.whl.metadata (7.3 kB)
Collecting docker<8,>=4.0.0 (from mlflow)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting gunicorn<24 (from mlflow)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting databricks-sdk<1,>=0.20.0 (from mlflow-skinny==2.21.3->mlflow)
  Downloading databricks_sdk-0.49.0-py3-none-any.whl.metadata (38 kB)
Collecting fastapi<1 (from mlflow-skinny==2.21.3->mlflow)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn<1 (from mlflow-skinny==2.21.3->mlflow)
  Downloading uvicorn-0.34.0-py3-none-any.whl.metadata (6.5 k

In [4]:
import zipfile

with zipfile.ZipFile("/content/archive.zip", 'r') as zip_ref:
    zip_ref.extractall("/content/f1_data")


In [5]:
import os

files = os.listdir("/content/f1_data")
files


['lap_times.csv',
 'drivers.csv',
 'sprint_results.csv',
 'constructors.csv',
 'status.csv',
 'pit_stops.csv',
 'constructor_standings.csv',
 'circuits.csv',
 'constructor_results.csv',
 'races.csv',
 'driver_standings.csv',
 'results.csv',
 'qualifying.csv',
 'seasons.csv']

In [6]:
import pandas as pd

lap_times = pd.read_csv("/content/f1_data/lap_times.csv")
lap_times.head()


Unnamed: 0,raceId,driverId,lap,position,time,milliseconds
0,841,20,1,1,1:38.109,98109
1,841,20,2,1,1:33.006,93006
2,841,20,3,1,1:32.713,92713
3,841,20,4,1,1:32.803,92803
4,841,20,5,1,1:32.342,92342


In [7]:
from sklearn.model_selection import train_test_split

# Use driverId, lap, and position as features
X = lap_times[["driverId", "lap", "position"]]
y = lap_times["milliseconds"]

# Split into train/test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [8]:
import mlflow
import mlflow.sklearn
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt

def run_experiment(n_estimators, max_depth, random_state):
    with mlflow.start_run():
        model = RandomForestRegressor(
            n_estimators=n_estimators,
            max_depth=max_depth,
            random_state=random_state
        )
        model.fit(X_train, y_train)
        preds = model.predict(X_test)

        # calculate metrics
        mse = mean_squared_error(y_test, preds)
        mae = mean_absolute_error(y_test, preds)
        r2 = r2_score(y_test, preds)

        # log parameters and metrics
        mlflow.log_param("n_estimators", n_estimators)
        mlflow.log_param("max_depth", max_depth)
        mlflow.log_param("random_state", random_state)

        mlflow.log_metric("mse", mse)
        mlflow.log_metric("mae", mae)
        mlflow.log_metric("r2", r2)

        # save model
        mlflow.sklearn.log_model(model, "model")

        # plot predictions vs actual values
        plt.figure(figsize=(6, 4))
        plt.scatter(y_test, preds, alpha=0.4)
        plt.xlabel("True lap time (ms)")
        plt.ylabel("Predicted lap time (ms)")
        plt.title("Predicted vs True")
        plt.tight_layout()
        plt.savefig("prediction_plot.png")
        mlflow.log_artifact("prediction_plot.png")
        plt.close()


In [9]:
for n in [50, 100, 150]:
    for depth in [5, 10, 15]:
        run_experiment(n_estimators=n, max_depth=depth, random_state=1)




In [10]:
# Install pyngrok
!pip install pyngrok --quiet

# Set up a temporary tracking URI folder
import os
os.environ["MLFLOW_TRACKING_URI"] = "file:/content/mlruns"

# Start MLflow UI in background
get_ipython().system_raw("mlflow ui --port 5000 &")


In [13]:
!ngrok config add-authtoken 2vTJ1u9zUtkUyt9zChYGoTHKogr_7ipj9Y3yXcNG53y4wGEes


Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [14]:
from pyngrok import ngrok

public_url = ngrok.connect(5000)
print("MLflow UI is available at:", public_url)


MLflow UI is available at: NgrokTunnel: "https://2c50-34-71-121-120.ngrok-free.app" -> "http://localhost:5000"
