In [44]:
import json
import pickle
import pandas as pd

In [45]:
df = pd.read_csv("/kaggle/input/bank-credit-scoring/bank.csv", sep=";")

In [46]:
! pip install pipenv -q

### Question 1 - What's the version of pipenv you installed?

In [47]:
! pipenv --version

[1mpipenv[0m, version 2023.10.3


### Question 2 - What's the first hash for scikit-learn you get in Pipfile.lock?

In [48]:
! pipenv install scikit-learn==1.3.1

[1;32mInstalling scikit-[0m[1;33mlearn[0m[1;32m==[0m[1;36m1.3[0m[1;32m.[0m[1;36m1[0m[1;33m...[0m
[?25lResolving scikit-[33mlearn[0m==[1;36m1.3[0m.[1;36m1[0m[33m...[0m
[2K✔ Installation Succeeded
[2K[32m⠋[0m Installing scikit-learn...
[1A[2K[1mInstalling dependencies from Pipfile.lock [0m[1m([0m[1me23712[0m[1m)[0m[1;33m...[0m
To activate this project's virtualenv, run [33mpipenv shell[0m.
Alternatively, run a command inside the virtualenv with [33mpipenv run[0m.


In [49]:
with open("/kaggle/working/Pipfile.lock") as infile:
    print(json.loads(infile.read())["default"]["scikit-learn"]["hashes"][0])

sha256:0c275a06c5190c5ce00af0acbb61c06374087949f643ef32d355ece12c4db043


In [50]:
! wget https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/master/cohorts/2023/05-deployment/homework/dv.bin -q
! wget https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/master/cohorts/2023/05-deployment/homework/model1.bin -q

In [51]:
with open("/kaggle/working/dv.bin", "rb") as infile:
    dv = pickle.load(infile)

with open("/kaggle/working/model1.bin", "rb") as infile:
    model = pickle.load(infile)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


### Question 3 - Prediction from inside Jupyter Notebook

In [52]:
client_data = dv.transform(
    {"job": "retired", "duration": 445, "poutcome": "success"})

In [53]:
model.predict_proba(client_data)[0], model.predict(client_data)[0]

(array([0.09806907, 0.90193093]), 'yes')

### Question 4 - Prediction from Model served with Flask

In [54]:
script = """from flask import Flask, request, jsonify
import joblib

app = Flask(__name__)

# Load your trained model
model = joblib.load('/kaggle/working/model1.bin')

@app.route('/predict', methods=['POST'])
def predict():
    data = request.json
    job = data['job']
    duration = data['duration']
    poutcome = data['poutcome']

    # Prepare the data for prediction
    features = [job, duration, poutcome]  # Assuming these are the features expected by your model

    # Make the prediction
    prediction = model.predict([features])

    return jsonify({'prediction': int(prediction[0])})

if __name__ == '__main__':
    app.run(debug=True)

"""

with open("app.py", "w") as outfile:
    outfile.write(script)

In [55]:
# ! pipenv install Flask gunicorn

[1;32mInstalling Flask[0m[1;33m...[0m
[?25lResolving Flask[33m...[0m
[2K✔ Installation Succeeded
[2K[32m⠋[0m Installing Flask...
[1A[2K[1;32mInstalling gunicorn[0m[1;33m...[0m
[?25lResolving gunicorn[33m...[0m
[2K✔ Installation Succeeded
[2K[32m⠋[0m Installing gunicorn...
[1A[2K[1mInstalling dependencies from Pipfile.lock [0m[1m([0m[1me23712[0m[1m)[0m[1;33m...[0m
To activate this project's virtualenv, run [33mpipenv shell[0m.
Alternatively, run a command inside the virtualenv with [33mpipenv run[0m.


In [56]:
# ! pipenv run gunicorn -b 0.0.0.0:5000 app:app

In [57]:
# import requests

# url = "http://127.0.0.1:5000/predict"  # Replace with your actual URL
# client = {"job": "unknown", "duration": 270, "poutcome": "failure"}

# response = requests.post(url, json=client)

# # Print the prediction
# print(response.json())

In [58]:
client_data = dv.transform(
    {"job": "unknown", "duration": 270, "poutcome": "failure"})

model.predict_proba(client_data)[0], model.predict(client_data)[0]

(array([0.86031053, 0.13968947]), 'no')

### Question 5 - So what's the size of this base image?

In [59]:
! curl -s https://hub.docker.com/v2/repositories/svizor/zoomcamp-model/tags/ | \
    jq '.results[] | select(.name=="3.10.12-slim") | .full_size'

curl: /opt/conda/lib/libcurl.so.4: no version information available (required by curl)
[0;39m53421085[0m


### Question 6 - Prediction using Model served with Docker

In [60]:
# # Dockerfile
# FROM svizor/zoomcamp-model:3.10.12-slim

# # Set the working directory in the container
# WORKDIR /app

# # Copy Pipenv files
# COPY Pipfile Pipfile.lock ./

# # Install dependencies
# RUN pipenv install --system --deploy --ignore-pipfile

# # Copy the Flask script
# COPY app.py /app

# # Expose the port that the app will run on
# EXPOSE 5000

# # Command to run the app with Gunicorn
# CMD ["gunicorn", "-b", "0.0.0.0:5000", "app:app"]

# # Build the Docker image from your Dockerfile located in the same directory as your Flask script and Pipenv files
# docker build -t prediction_service .

# docker run -p 5000:5000 prediction_service

In [61]:
client_data = dv.transform(
    {"job": "retired", "duration": 445, "poutcome": "success"})

model.predict_proba(client_data)[0], model.predict(client_data)[0]

(array([0.09806907, 0.90193093]), 'yes')