# **Gilgit Baltistan FAQ Chatbot**

In [41]:
import pandas as pd

# Load the dataset
file_path = '/content/Gilgit_Baltistan_FAQs_Expanded.csv'
data = pd.read_csv(file_path)


print(data.head())

print(data.info())


                                         Question  \
0         What is the approximate area of Skardu?   
1         What are the key attractions in Skardu?   
2               What challenges does Skardu face?   
3  What is the distance of Skardu from Islamabad?   
4        What is the temperature range in Skardu?   

                                              Answer  
0  Skardu has a specific area that varies based o...  
1  Skardu offers stunning places such as valleys,...  
2  Skardu faces challenges such as limited infras...  
3  The distance from Islamabad to Skardu is appro...  
4  The temperature in Skardu ranges from extreme ...  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 487 entries, 0 to 486
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Question  487 non-null    object
 1   Answer    487 non-null    object
dtypes: object(2)
memory usage: 7.7+ KB
None


In [42]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import string
import pandas as pd

# Download required NLTK data
nltk.download('punkt')
nltk.download('stopwords')

# Define preprocessing function for input text
def preprocess_text_input(text):

    text = text.lower().strip()


    tokens = word_tokenize(text)


    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words and word not in string.punctuation]

    return " ".join(tokens)



data['Question'] = data['Question'].apply(preprocess_text_input)

data = data.drop_duplicates()

# Save the cleaned data
data.to_csv('cleaned_FAQs.csv', index=False)
print("Text preprocessed, cleaned, and saved!")


Text preprocessed, cleaned, and saved!


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [43]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(data['Question'])

def get_response(user_query):

    user_query = preprocess_text(user_query)


    user_vec = vectorizer.transform([user_query])

    # Calculate similarity
    similarities = cosine_similarity(user_vec, X)
    idx = similarities.argmax()

    # Return the corresponding answer
    return data.iloc[idx]['Answer']

In [44]:
# Test the chatbot
user_input = "What you know about chaqchan msoque?"
response = get_response(user_input)
print(f"Chatbot: {response}")

Chatbot: The Chaqchan Mosque, located in Khaplu, is over 700 years old and is one of the oldest mosques in the region, showcasing Tibetan and Islamic architecture.


In [46]:
import gradio as gr

# Define your chatbot response function
def chatbot_response(user_input):
    response = get_response(user_input)
    return response

# Create the Gradio interface
interface = gr.Interface(
    fn=chatbot_response,
    inputs=gr.Textbox(label="Your Question"),
    outputs=gr.Textbox(label="Chatbot Response"),
    title="Gilgit-Baltistan FAQ Chatbot",
    description="Ask me anything about Gilgit-Baltistan! This chatbot provides information about tourism, culture, and challenges in the region."
)

# Launch the Gradio app
interface.launch()


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://752e0d96ebb32c7e06.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


