In [1]:
# Download the model file
!wget -O pipeline_v1.bin https://github.com/DataTalksClub/machine-learning-zoomcamp/raw/refs/heads/master/cohorts/2025/05-deployment/pipeline_v1.bin

--2025-10-30 20:54:29--  https://github.com/DataTalksClub/machine-learning-zoomcamp/raw/refs/heads/master/cohorts/2025/05-deployment/pipeline_v1.bin
Resolving github.com (github.com)... 20.207.73.82
Connecting to github.com (github.com)|20.207.73.82|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/refs/heads/master/cohorts/2025/05-deployment/pipeline_v1.bin [following]
--2025-10-30 20:54:29--  https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/refs/heads/master/cohorts/2025/05-deployment/pipeline_v1.bin
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.108.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1300 (1.3K) [application/octet-stream]
Saving to: ‘pipeline_v1.bin’


2025-10-30 20:

In [2]:
# Verify checksum
!md5sum pipeline_v1.bin

7d17d2e4dfbaf1e408e1a62e6e880d49  pipeline_v1.bin


In [3]:
import warnings
warnings.filterwarnings("ignore") 

In [4]:
# Question 3: Load model and make prediction
import pickle

# Load the model
with open('pipeline_v1.bin', 'rb') as f:
    pipeline = pickle.load(f)

# Define the client record
client = {
    "lead_source": "paid_ads",
    "number_of_courses_viewed": 2,
    "annual_income": 79276.0
}

# Make prediction
probability = pipeline.predict_proba([client])[0][1]

print(f"Conversion probability: {probability:.3f}")

# Map to closest answer
answers = [0.333, 0.533, 0.733, 0.933]
closest = min(answers, key=lambda x: abs(x - probability))
print(f"Question 3 Answer: {closest}")

Conversion probability: 0.534
Question 3 Answer: 0.533


In [5]:
# Question 4: FastAPI Service

In [6]:
%%writefile main.py
from fastapi import FastAPI
from pydantic import BaseModel
import pickle

# Load model
with open('pipeline_v1.bin', 'rb') as f:
    pipeline = pickle.load(f)

app = FastAPI()

class Client(BaseModel):
    lead_source: str
    number_of_courses_viewed: int
    annual_income: float

@app.post("/predict")
def predict(client: Client):
    probability = pipeline.predict_proba([client.dict()])[0][1]
    return {"conversion_probability": round(probability, 3)}

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)


Overwriting main.py


In [7]:
# Start the FastAPI server in background
import subprocess
import time
import requests

# Start the server
server_process = subprocess.Popen(['uvicorn', 'main:app', '--host', '0.0.0.0', '--port', '8000'])

# Wait for server to start
time.sleep(5)

# Test the API
url = "http://localhost:8000/predict"
client = {
    "lead_source": "organic_search",
    "number_of_courses_viewed": 4,
    "annual_income": 80304.0
}

try:
    response = requests.post(url, json=client)
    result = response.json()
    probability = result["conversion_probability"]
    
    print(f"API Response: {result}")
    print(f"Probability: {probability}")
    
    # Map to closest answer
    answers = [0.334, 0.534, 0.734, 0.934]
    closest = min(answers, key=lambda x: abs(x - probability))
    print(f"Question 4 Answer: {closest}")
    
except Exception as e:
    print(f"Error: {e}")

# Stop the server
server_process.terminate()

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
INFO:     Started server process [27533]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


INFO:     127.0.0.1:54520 - "POST /predict HTTP/1.1" 200 OK
API Response: {'conversion_probability': 0.534}
Probability: 0.534
Question 4 Answer: 0.534


In [8]:
# Final test script for Question 6 (run after Docker container is running)
import requests

url = "http://localhost:8000/predict"
client = {
    "lead_source": "organic_search",
    "number_of_courses_viewed": 4,
    "annual_income": 80304.0
}

try:
    response = requests.post(url, json=client)
    result = response.json()
    probability = result["conversion_probability"]
    
    print(f"Docker API Response: {result}")
    print(f"Probability: {probability}")
    
    # Map to closest answer
    answers = [0.39, 0.59, 0.79, 0.99]
    closest = min(answers, key=lambda x: abs(x - probability))
    print(f"Question 6 Answer: {closest}")
    
except Exception as e:
    print(f"Error: {e}. Make sure Docker container is running.")

INFO:     127.0.0.1:54524 - "POST /predict HTTP/1.1" 200 OK
Docker API Response: {'conversion_probability': 0.534}
Probability: 0.534
Question 6 Answer: 0.59
