# Homework 5 - Model Deployment
# This notebook covers model deployment using FastAPI and Docker

In [1]:
# Question 3: Load pipeline and score a record
import pickle
import requests

In [2]:
def download_file(url, filename):
    """Download file if it doesn't exist"""
    import os
    if not os.path.exists(filename):
        print(f"Downloading {filename}...")
        response = requests.get(url)
        with open(filename, 'wb') as f:
            f.write(response.content)
        print("Download completed!")
    else:
        print(f"{filename} already exists")

In [3]:
# Download the model
model_url = "https://github.com/DataTalksClub/machine-learning-zoomcamp/raw/refs/heads/master/cohorts/2025/05-deployment/pipeline_v1.bin"
download_file(model_url, "pipeline_v1.bin")

# Load the model
def load_model(filename):
    with open(filename, 'rb') as f:
        pipeline = pickle.load(f)
    return pipeline


pipeline_v1.bin already exists


In [4]:
# Load the pipeline
pipeline = load_model("pipeline_v1.bin")
print("Model loaded successfully!")

# Prepare the client data
client = {
    "lead_source": "paid_ads",
    "number_of_courses_viewed": 2,
    "annual_income": 79276.0
}


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Model loaded successfully!


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [5]:
# Make prediction
def predict_conversion(pipeline, client_data):
    # Convert to list of one record for prediction
    X = [client_data]
    # Get probability of positive class (conversion)
    proba = pipeline.predict_proba(X)[0, 1]
    return proba

# Get probability
probability = predict_conversion(pipeline, client)
print(f"Question 3: Probability of conversion: {probability:.3f}")

# Compare with options
options = [0.333, 0.533, 0.733, 0.933]
closest_option = min(options, key=lambda x: abs(x - probability))
print(f"Closest option: {closest_option}")

Question 3: Probability of conversion: 0.534
Closest option: 0.533


In [6]:
# Question 4: Create FastAPI service
# This code goes in a separate file: main.py

fastapi_code = '''
from fastapi import FastAPI
from pydantic import BaseModel
import pickle

# Load the model
with open('pipeline_v1.bin', 'rb') as f:
    pipeline = pickle.load(f)

# Define request model
class ClientData(BaseModel):
    lead_source: str
    number_of_courses_viewed: int
    annual_income: float

# Create FastAPI app
app = FastAPI()

@app.post("/predict")
def predict(client: ClientData):
    # Convert to dict
    client_dict = client.dict()
    
    # Prepare for prediction (needs to be a list of one record)
    X = [client_dict]
    
    # Get probability
    proba = pipeline.predict_proba(X)[0, 1]
    
    return {"conversion_probability": round(proba, 3)}

@app.get("/")
def root():
    return {"message": "Lead Scoring API"}

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)
'''

# Save the FastAPI code to a file
with open('main.py', 'w') as f:
    f.write(fastapi_code)

print("FastAPI code saved to main.py")

# Now let's test the prediction for Question 4
def test_question4_prediction():
    """Test the prediction for the client in Question 4"""
    client_q4 = {
        "lead_source": "organic_search",
        "number_of_courses_viewed": 4,
        "annual_income": 80304.0
    }
    
    probability_q4 = predict_conversion(pipeline, client_q4)
    print(f"Question 4: Probability of conversion: {probability_q4:.3f}")
    
    # Compare with options
    options_q4 = [0.334, 0.534, 0.734, 0.934]
    closest_option_q4 = min(options_q4, key=lambda x: abs(x - probability_q4))
    print(f"Closest option: {closest_option_q4}")
    
    return probability_q4

# Run the test
prob_q4 = test_question4_prediction()

FastAPI code saved to main.py
Question 4: Probability of conversion: 0.534
Closest option: 0.534


In [9]:
# Question 6: Create Dockerfile and test the container

# First, let's create a requirements file
requirements_content = '''
fastapi==0.104.1
uvicorn==0.24.0
scikit-learn==1.6.1
pydantic==2.5.0
'''

with open('requirements.txt', 'w') as f:
    f.write(requirements_content)

# Create Dockerfile
dockerfile_content = '''
FROM agrigorev/zoomcamp-model:2025

WORKDIR /app

# Copy requirements and install dependencies
COPY requirements.txt .
RUN pip install -r requirements.txt

# Copy the application code
COPY main.py .
COPY pipeline_v1.bin .

# Expose port
EXPOSE 8000

# Command to run the application
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
'''

with open('Dockerfile', 'w') as f:
    f.write(dockerfile_content)

print("Dockerfile and requirements.txt created")


# Let's also create a test script
test_script = '''
import requests

def test_api():
    url = "http://localhost:8000/predict"
    client = {
        "lead_source": "organic_search",
        "number_of_courses_viewed": 4,
        "annual_income": 80304.0
    }
    try:
        response = requests.post(url, json=client)
        result = response.json()
        print(f"API Response: {result}")
        return result['conversion_probability']
    except Exception as e:
        print(f"Error: {e}")
        return None

if __name__ == "__main__":
    test_api()
'''

with open('test_api.py', 'w') as f:
    f.write(test_script)

print("Test script created: test_api.py")

Dockerfile and requirements.txt created
Test script created: test_api.py
