Question 3

In [None]:
import pickle
import requests  # Optional, if downloading the file via code

# Download the model if not present (comment out if already downloaded)
import urllib.request
#urllib.request.urlretrieve("https://github.com/DataTalksClub/machine-learning-zoomcamp/raw/refs/heads/master/cohorts/2025/05-deployment/pipeline_v1.bin", "pipeline_v1.bin")

# Load the pipeline
with open('pipeline_v1.bin', 'rb') as f_in:
    model = pickle.load(f_in)

# The record to score
client = {
    "lead_source": "paid_ads",
    "number_of_courses_viewed": 2,
    "annual_income": 79276.0
}

# Predict probability of conversion (class 1)
prob = model.predict_proba([client])[0, 1]
print(f'Probability: {prob:.3f}')


Question 4

FastAPI script for serving the model (app.py):

In [None]:
from fastapi import FastAPI
import pickle
import uvicorn
from pydantic import BaseModel
from typing import Dict, Any

# Load the model
with open('pipeline_v1.bin', 'rb') as f_in:
    model = pickle.load(f_in)

app = FastAPI()

class Client(BaseModel):
    lead_source: str
    number_of_courses_viewed: int
    annual_income: float

@app.post("/predict")
def predict(client: Client):
    client_dict = client.dict()
    prob = model.predict_proba([client_dict])[0, 1]
    return {"probability": prob}

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8000)


Client scoring script (score_client.py):

In [None]:
import requests

url = "http://localhost:8000/predict" 
client = {
    "lead_source": "organic_search",
    "number_of_courses_viewed": 4,
    "annual_income": 80304.0
}

response = requests.post(url, json=client).json()
print(response)

Question 5

Command to download the base image:

docker pull agrigorev/zoomcamp-model:2025


Then check the size:

docker images agrigorev/zoomcamp-model:2025



The size of this base image is 45 MB.

Question 6:

In [None]:
#Update app.py to load pipeline_v2.bin instead (the base image has it):
# In app.py, change the load line to:
with open('pipeline_v2.bin', 'rb') as f_in:
    model = pickle.load(f_in)

#Client scoring script (same as Question 4, but update URL to http://localhost:8000/predict):


import requests

url = "http://localhost:8000/predict"
client = {
    "lead_source": "organic_search",
    "number_of_courses_viewed": 4,
    "annual_income": 80304.0
}

response = requests.post(url, json=client).json()
print(response)