<a href="https://colab.research.google.com/github/Sidhtang/bert-project/blob/main/fast_api_for_the_mode_l.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

In [2]:
import torch
from transformers import AutoTokenizer
import os

# Check if MPS is available and set the device accordingly
device = torch.device('mps') if torch.backends.mps.is_available() else torch.device('cpu')
print(f"Using device: {device}")

# Load the model
model_name = "/content/drive/MyDrive/distilbert_general_router_model (1).pth"

# Check if the model file exists and is not empty
if not os.path.exists(model_name) or os.stat(model_name).st_size == 0:
    print(f"Error: Model file not found or empty: {model_name}")
else:
    # Load the tokenizer
    tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

    try:
        model = torch.load(model_name, map_location=device)
    except RuntimeError as e:
        print(f"Error loading model: {e}")
    else:
        model.eval()  # Put the model in evaluation mode
        model.to(device)

        # Function for inference
        def classify_query(query):
            inputs = tokenizer(query, return_tensors='pt', max_length=256, truncation=True, padding=True)
            inputs = {k: v.to(device) for k, v in inputs.items()}  # Move inputs to the correct device
            with torch.no_grad():
                outputs = model(**inputs)

            logits = outputs.logits
            prediction = torch.argmax(logits, dim=1).item()
            return "Personalization" if prediction == 1 else "Customer_support"

        # Test the model with queries
        queries = [
            "Hey there, you guys got some nice hoodies for me?"
        ]
        for query in queries:
            result = classify_query(query)
            print(f"Query: {query}")
            print(f"Prediction: {result}")
            print()

Using device: cpu


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

  model = torch.load(model_name, map_location=device)


Query: Hey there, you guys got some nice hoodies for me?
Prediction: Personalization



In [3]:
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.38.0-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting tenacity<9,>=8.1.0 (from streamlit)
  Downloading tenacity-8.5.0-py3-none-any.whl.metadata (1.2 kB)
Collecting gitpython!=3.1.19,<4,>=3.0.7 (from streamlit)
  Downloading GitPython-3.1.43-py3-none-any.whl.metadata (13 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting watchdog<5,>=2.1.5 (from streamlit)
  Downloading watchdog-4.0.2-py3-none-manylinux2014_x86_64.whl.metadata (38 kB)
Collecting gitdb<5,>=4.0.1 (from gitpython!=3.1.19,<4,>=3.0.7->streamlit)
  Downloading gitdb-4.0.11-py3-none-any.whl.metadata (1.2 kB)
Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit)
  Downloading smmap-5.0.1-py3-none-any.whl.metadata (4.3 kB)
Downloading streamlit-1.38.0-py2.py3-none-any.whl (8.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.7/8.7 MB[0m [31m63.2 MB

In [5]:
!pip install fastapi



In [6]:
!pip install uvicorn

Collecting uvicorn
  Downloading uvicorn-0.31.0-py3-none-any.whl.metadata (6.6 kB)
Collecting h11>=0.8 (from uvicorn)
  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)
Downloading uvicorn-0.31.0-py3-none-any.whl (63 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/63.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.7/63.7 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading h11-0.14.0-py3-none-any.whl (58 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/58.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: h11, uvicorn
Successfully installed h11-0.14.0 uvicorn-0.31.0


In [5]:
!pip install gradio

Collecting gradio
  Downloading gradio-4.44.1-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0 (from gradio)
  Downloading fastapi-0.115.0-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.3.0 (from gradio)
  Downloading gradio_client-1.3.0-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.9 (from g

In [6]:
import torch
from transformers import AutoTokenizer
import os
import gradio as gr

# Check if MPS is available and set the device accordingly
device = torch.device('mps') if torch.backends.mps.is_available() else torch.device('cpu')
print(f"Using device: {device}")

# Load the model
model_name = "/content/drive/MyDrive/distilbert_general_router_model (1).pth"

# Check if the model file exists and is not empty
if not os.path.exists(model_name) or os.stat(model_name).st_size == 0:
    print(f"Error: Model file not found or empty: {model_name}")
else:
    # Load the tokenizer
    tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

    try:
        model = torch.load(model_name, map_location=device)
    except RuntimeError as e:
        print(f"Error loading model: {e}")
    else:
        model.eval()  # Put the model in evaluation mode
        model.to(device)

        # Function for inference
        def classify_query(query):
            inputs = tokenizer(query, return_tensors='pt', max_length=256, truncation=True, padding=True)
            inputs = {k: v.to(device) for k, v in inputs.items()}  # Move inputs to the correct device
            with torch.no_grad():
                outputs = model(**inputs)

            logits = outputs.logits
            prediction = torch.argmax(logits, dim=1).item()
            return "Personalization" if prediction == 1 else "Customer_support"

        # Gradio interface
        def gradio_interface(query):
            result = classify_query(query)
            return result

        # Create and launch the Gradio interface
        iface = gr.Interface(
            fn=gradio_interface,
            inputs=gr.Textbox(lines=2, placeholder="Enter your query here..."),
            outputs="text",
            title="Query Classifier",
            description="Classify queries as 'Personalization' or 'Customer_support'",
        )

        iface.launch()

Using device: cpu


  model = torch.load(model_name, map_location=device)


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://198d31f68cd72f0257.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)
