In [33]:

import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics.pairwise import cosine_similarity
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import string
import warnings
import gradio as gr

# Suppress warnings
warnings.filterwarnings("ignore")

# Download required NLTK data
nltk.download("punkt")
nltk.download("stopwords")

# Load stopwords
stop_words = set(stopwords.words('english'))

def clean_text(sentence):
    tokens = word_tokenize(sentence.lower())
    tokens = [word for word in tokens if word not in stop_words and word not in string.punctuation]
    return ' '.join(tokens)

# Load data (replace with your actual file path)
try:
    data = pd.read_csv("/content/BankFAQs.csv")
except FileNotFoundError:
    print("Error: 'BankFAQs.csv' not found. Please upload the file to the Colab environment.")
    # You might want to exit or handle the error differently
    exit()


data["Cleaned_Question"] = data["Question"].apply(clean_text)
label_encoder = LabelEncoder()
data["Class_Label"] = label_encoder.fit_transform(data["Class"])

vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(data["Cleaned_Question"])
y = data["Class_Label"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

classifier = SVC(kernel="linear", probability=True)
classifier.fit(X_train, y_train) # Fit the classifier


def get_response(user_input):
    try:
        cleaned_input = clean_text(user_input)
        input_vector = vectorizer.transform([cleaned_input])
        predicted_class = classifier.predict(input_vector)[0]
        class_questions = data[data["Class_Label"] == predicted_class]
        similarities = cosine_similarity(input_vector, vectorizer.transform(class_questions["Cleaned_Question"]))
        max_sim_index = similarities.argmax()
        response = class_questions.iloc[max_sim_index]["Answer"]
        return response
    except (ValueError, IndexError) as e:
        return f"An error occurred: {e}. Please try a different query."


iface = gr.Interface(
    fn=get_response,
    inputs=gr.Textbox(lines=2, placeholder="Enter your query here..."),
    outputs="text",
    title="Bank FAQ Chatbot",
    description="Ask me anything about accounts, investments, or funds."
)

iface.launch()

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://80b12631e6e4541ff4.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


