In [1]:
# Run this cell to install mlflow
!pip install mlflow

Collecting mlflow
  Downloading mlflow-2.17.1-py3-none-any.whl.metadata (29 kB)
Collecting mlflow-skinny==2.17.1 (from mlflow)
  Downloading mlflow_skinny-2.17.1-py3-none-any.whl.metadata (30 kB)
Collecting alembic!=1.10.0,<2 (from mlflow)
  Downloading alembic-1.13.3-py3-none-any.whl.metadata (7.4 kB)
Collecting docker<8,>=4.0.0 (from mlflow)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.4.1-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting gunicorn<24 (from mlflow)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting databricks-sdk<1,>=0.20.0 (from mlflow-skinny==2.17.1->mlflow)
  Downloading databricks_sdk-0.36.0-py3-none-any.whl.metadata (38 kB)
Collecting Mako (from alembic!=1.10.0,<2->mlflow)
  Downloading Mako-1.3.6-py3-none-any.whl.metadata (2.9 kB)
Collecting graphql-core<3.3,>=3.1 (from graphene<4->mlflow)
  Downloading graphql_core-3.2.5-py3-none-any.whl.metadata (10 kB)
Colle

In [4]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [5]:
# Run this cell to import the modules you require
import pandas as pd
import numpy as np
import mlflow
import mlflow.sklearn
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

# Read in the data
#weather = pd.read_csv("london_weather.csv")
weather = pd.read_csv("/content/gdrive/MyDrive/Colab Notebooks/london_weather.csv")
# Start coding here
from sklearn.impute import SimpleImputer

# Step 1: Preprocessing the data

# Handle missing values for X (features)
imputer_X = SimpleImputer(strategy='mean')
imputed_X = imputer_X.fit_transform(weather.drop(columns=['date']))

# Handle missing values for y (target)
imputer_y = SimpleImputer(strategy='mean')
y = imputer_y.fit_transform(weather[['mean_temp']]).ravel()  # Use ravel() to flatten y

# Standardize the features
scaler = StandardScaler()
scaled_X = scaler.fit_transform(imputed_X)

# Assign features to X and split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(scaled_X, y, test_size=0.2, random_state=42)

# Step 2: Define models and train them
models = {
    "Linear Regression": LinearRegression(),
    "Decision Tree": DecisionTreeRegressor(),
    "Random Forest": RandomForestRegressor(n_estimators=100)
}

# Step 3: Log experiments using MLflow
for model_name, model in models.items():
    with mlflow.start_run(run_name=model_name):
        # Train the model
        model.fit(X_train, y_train)

        # Predictions and RMSE
        predictions = model.predict(X_test)
        rmse = np.sqrt(mean_squared_error(y_test, predictions))

        # Log model parameters and metrics
        mlflow.log_param("model_name", model_name)
        mlflow.log_metric("rmse", rmse)
        mlflow.sklearn.log_model(model, model_name)

        print(f"{model_name}: RMSE = {rmse}")




Linear Regression: RMSE = 4.04933655923303e-15




Decision Tree: RMSE = 0.013628215225933991




Random Forest: RMSE = 0.017780998201306374
