<a href="https://colab.research.google.com/github/Kasper-Raupach-Haurum/M6-Data-Engineering-and-MLOps/blob/main/Test_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [46]:
!pip install mlflow --quiet
!pip install -U ipython -quiet
!pip install streamlit --quiet
!pip install pyngrok --quiet


Usage:   
  pip3 install [options] <requirement specifier> [package-index-options] ...
  pip3 install [options] -r <requirements file> [package-index-options] ...
  pip3 install [options] [-e] <vcs project url> ...
  pip3 install [options] [-e] <local project path> ...
  pip3 install [options] <archive url/path> ...

no such option: -u


In [47]:
import os
import shutil
import sys
import pandas as pd
import numpy as np
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
from mlflow.tracking import MlflowClient
from random import random, randint
import mlflow
import mlflow.sklearn

In [None]:
# Fetch the dataset and split it
newsgroups = fetch_20newsgroups(subset='all', remove=('headers', 'footers', 'quotes'))
X_train, X_test, y_train, y_test = train_test_split(newsgroups.data, newsgroups.target, random_state=42)

In [None]:
# Create and train the pipeline
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(stop_words='english')),
    ('clf', LinearSVC())
])

Accuracy: 0.7510611205432938


In [None]:
pipeline.fit(X_train, y_train)
predictions = pipeline.predict(X_test)
print(f'Accuracy: {accuracy_score(y_test, predictions)}')

##D

In [None]:
# Set the tracking URI and create a new experiment
mlflow.set_tracking_uri("sqlite:///mlflow.db")
experiment_id = mlflow.create_experiment("Newsgroups_Classifier")
mlflow.set_experiment("Newsgroups_Classifier")

In [None]:
# Log parameters, metrics, and artifacts
with mlflow.start_run(experiment_id=experiment_id):
    mlflow.log_param("C", 1.0)
    mlflow.log_metric("accuracy", accuracy_score(y_test, predictions))
    mlflow.sklearn.log_model(pipeline, "newsgroups_pipeline")

In [None]:
%%writefile requirements.txt
mlflow
streamlit
sklearn
pandas
numpy

Writing requirements.txt


In [None]:
%%writefile app.py
import streamlit as st
import mlflow
import mlflow.sklearn
import joblib

st.title("20 Newsgroups Text Classifier")

# Load the model
model = mlflow.sklearn.load_model("newsgroups_pipeline")

# Allow users to input text
text = st.text_area("Enter text to classify:")

if text:
    # Classify the input text
    prediction = model.predict([text])[0]
    st.write(f"The predicted category is: {prediction}")

Overwriting app.py


In [None]:
# Save the requirements for the Streamlit app
with open("requirements.txt", "w") as f:
    f.write("mlflow\nstreamlit\nsklearn\npandas\nnumpy\n")

In [None]:
# Create the Streamlit app
with open("app.py", "w") as f:
    f.write('''
import streamlit as st
import mlflow
import mlflow.sklearn
import joblib

st.title("20 Newsgroups Text Classifier")

# Load the model
model = mlflow.sklearn.load_model("newsgroups_pipeline")

# Allow users to input text
text = st.text_area("Enter text to classify:")

if text:
    # Classify the input text
    prediction = model.predict([text])[0]
    st.write(f"The predicted category is: {prediction}")
''')


##DDD

In [None]:
%%writefile Dockerfile
# Use the official Python image as the base image
FROM python:3.8-slim

# Set the working directory
WORKDIR /app

# Copy the requirements file into the container
COPY requirements.txt .

# Install the required packages
RUN pip install --trusted-host pypi.python.org -r requirements.txt

# Copy the rest of the application code
COPY . .

# Expose the port the app will run on
EXPOSE 8501

# Start the Streamlit app
CMD ["streamlit", "run", "app.py"]

Writing Dockerfile


In [None]:
# Create a Dockerfile
with open("Dockerfile", "w") as f:
    f.write('''
FROM python:3.8-slim

WORKDIR /app

COPY requirements.txt ./requirements.txt
RUN pip install -r requirements.txt

COPY . .

EXPOSE 8501

CMD ["streamlit", "run", "app.py"]
''')

In [None]:
# Provide instructions for running the app from Docker Hub repository
print("""
1. Build the Docker image: 
   docker build -t your_dockerhub_username/newsgroups_classifier .

2. Push the Docker image to Docker Hub: 
   docker push your_dockerhub_username/newsgroups_classifier

3. Run the Docker container: 
   docker run -p 8501:8501 your_dockerhub_username/newsgroups_classifier
""")