In [None]:
!pip install gradio pyspellchecker


Collecting gradio
  Downloading gradio-5.9.1-py3-none-any.whl.metadata (16 kB)
Collecting pyspellchecker
  Downloading pyspellchecker-0.8.2-py3-none-any.whl.metadata (9.4 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.5.2 (from gradio)
  Downloading gradio_client-1.5.2-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.2.2 (fro

In [None]:
!pip install --upgrade gradio



In [None]:
!pip install --upgrade gradio transformers



In [None]:
!pip install langdetect

Collecting langdetect
  Downloading langdetect-1.0.9.tar.gz (981 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/981.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.4/981.5 kB[0m [31m4.2 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m972.8/981.5 kB[0m [31m15.3 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.5/981.5 kB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: langdetect
  Building wheel for langdetect (setup.py) ... [?25l[?25hdone
  Created wheel for langdetect: filename=langdetect-1.0.9-py3-none-any.whl size=993222 sha256=3bebe21dd41019256b6acdd4df86216ebc5c925afacae1a2712fb2b6d50b3463
  Stored in directory: /root/.cache/pip/wheels/95/03/7d/59ea870c70ce4e5a370638b5462a7711

In [None]:
!python -m spacy download xx_ent_wiki_sm

Collecting xx-ent-wiki-sm==3.7.0
  Downloading https://github.com/explosion/spacy-models/releases/download/xx_ent_wiki_sm-3.7.0/xx_ent_wiki_sm-3.7.0-py3-none-any.whl (11.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.1/11.1 MB[0m [31m21.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: xx-ent-wiki-sm
Successfully installed xx-ent-wiki-sm-3.7.0
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('xx_ent_wiki_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.




---



In [None]:
import spacy
import json
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import gradio as gr
from langdetect import detect
import re

# Load the multilingual NLP model
nlp = spacy.load("xx_ent_wiki_sm")  # Multilingual spaCy model

def extract_keywords(user_input):
    """Extract keywords from user input using spaCy."""
    doc = nlp(user_input)
    keywords = [token.lemma_ for token in doc if not token.is_stop and not token.is_punct]
    return keywords

class Chatbot:
    def __init__(self, dataset, book_data):
        """Initialize the chatbot with dataset and book data."""
        self.dataset = dataset
        self.book_data = book_data
        self.vectorizer, self.X = self.prepare_vectorizer()

    def prepare_vectorizer(self):
        """Create and fit a TF-IDF vectorizer on the provided corpus."""
        corpus = [entry["User"] for entry in self.dataset]
        if not corpus:
            raise ValueError("Corpus for TF-IDF Vectorizer is empty.")
        vectorizer = TfidfVectorizer()
        X = vectorizer.fit_transform(corpus)
        return vectorizer, X

    def get_response(self, user_input):
        """Get the chatbot's response based on user input."""
        if not user_input.strip():
            return "Please provide a valid input."
        try:
            user_vec = self.vectorizer.transform([user_input])
            similarities = cosine_similarity(user_vec, self.X)
            best_match_index = similarities.argmax()
            if similarities[0, best_match_index] > 0.5:
                return self.dataset[best_match_index]["Chatbot"]
        except Exception as e:
            print(f"Error during response generation: {e}")
            return "Oops, something went wrong while processing your request. Can you try again?"
        return "Hmm, I couldn’t find anything related to that. Want to try rephrasing?"

    def recommend_books(self, user_input, filters={}):
        """Recommend books based on user input and filters."""
        keywords = extract_keywords(user_input)
        print(f"Extracted Keywords: {keywords}")  # Debug output
        try:
            # Match titles with stricter word boundaries using regex
            keyword_pattern = '|'.join(f"\\b{re.escape(keyword)}\\b" for keyword in keywords)
            matched_books = self.book_data[
                self.book_data['Title'].str.contains(keyword_pattern, case=False, na=False, flags=re.UNICODE)
            ]

            print(f"Matched Books Before Filters: {matched_books}")  # Debug output

            for key, value in filters.items():
                if value:
                    matched_books = matched_books[matched_books[key].str.contains(value, case=False, na=False)]

            print(f"Matched Books After Filters: {matched_books}")  # Debug output

            if not matched_books.empty:
                # Only show books where titles contain all keywords
                filtered_books = matched_books[matched_books['Title'].apply(lambda title: all(re.search(f"\\b{re.escape(keyword)}\\b", title, re.IGNORECASE) for keyword in keywords))]

                if not filtered_books.empty:
                    response = "<div style='max-height: 300px; overflow-y: auto;'>"
                    response += "<p>Here are some books you might like:</p><ul>"
                    for _, row in filtered_books.iterrows():
                        response += f"{row['Title']} ({row['Status']}, located at {row['Location']})\n"
                    return response
                else:
                    return "<p>Sorry, I couldn’t find any books matching all your keywords. Maybe try simplifying your query?</p>"
        except Exception as e:
            print(f"Error during book recommendation: {e}")
            return "<p>Yikes, something went wrong while finding books for you. Mind trying again?</p>"

        return "<p>Sorry, I couldn’t find any books matching your request. Maybe try different keywords?</p>"

# Gradio interface function
def chatbot_ui(user_input):
    """Interface function to get a response from the chatbot or recommend books."""
    try:
        filters = {}
        if "recommend" in user_input.lower():
            parts = user_input.lower().split(',')
            keywords = parts[0].replace("recommend", "").strip()
            for part in parts[1:]:
                key, value = part.split(":")
                filters[key.strip()] = value.strip()
            response = chatbot.recommend_books(keywords, filters)
        else:
            response = chatbot.get_response(user_input)
        return response
    except Exception as e:
        print(f"Error: {e}")  # Log the error in the terminal
        return f"<p>Oh no! Something went wrong: {e}. Can you try again?</p>"

# Load datasets and validate
file_path = "formatted_chatbot_data.json"
try:
    with open(file_path, 'r') as f:
        dataset = json.load(f)
        if not dataset:
            raise ValueError("Chatbot dataset is empty or not loaded correctly.")
except Exception as e:
    print(f"Error loading chatbot dataset: {e}")
    raise

book_data_path = "lspu_library_dataset - Sheet1.csv"
try:
    book_data = pd.read_csv(book_data_path)
    if book_data.empty:
        raise ValueError("Book data is empty or not loaded correctly.")
except Exception as e:
    print(f"Error loading book data: {e}")
    raise

# Initialize chatbot and Gradio interface
chatbot = Chatbot(dataset, book_data)

chatbot_interface = gr.Interface(
    fn=chatbot_ui,
    inputs=gr.Textbox(lines=3, placeholder="Type your question or request book recommendations...", label="Your Message"),
    outputs=gr.Textbox(label="Response"),
    title="Libby the Library Assistant",
    description="Ask about books or get recommendations."
)

chatbot_interface.launch()


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://c0e56fe2f45ad459be.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


