In [5]:
from sentence_transformers import SentenceTransformer, InputExample, losses, util
import pandas as pd
import torch
from torch.utils.data import DataLoader
import os


In [7]:
# Step 1: Load FAQ and Next Question data from a single Excel file
faq_df = pd.read_excel("/WITDS_FAQ_Paraphrased.xlsx")  # Columns: Question, Answer, Next_Question
faq_df = faq_df.dropna(subset=["Question", "Answer"]).fillna("")
faq_df["Question"] = faq_df["Question"].astype(str)
faq_df["Answer"] = faq_df["Answer"].astype(str)
faq_df["Next_Question"] = faq_df["Next Question"].astype(str)

faq_questions = faq_df["Question"].tolist()
faq_answers = faq_df["Answer"].tolist()
next_question_map = dict(zip(faq_df["Question"], faq_df["Next Question"]))

In [8]:
# Step 2: Prepare training pairs for fine-tuning (same question used twice)
train_examples = [InputExample(texts=[q, q]) for q in faq_questions]
train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=16)

In [9]:
# Step 3: Load base model and fine-tune it
model = SentenceTransformer("all-MiniLM-L6-v2")
train_loss = losses.MultipleNegativesRankingLoss(model)
model.fit(
    train_objectives=[(train_dataloader, train_loss)],
    epochs=5,
    warmup_steps=10,
    show_progress_bar=True
)

Computing widget examples:   0%|          | 0/1 [00:00<?, ?example/s]



<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mbibhavkumar05[0m ([33mbibhavkumar05-upes[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss
500,0.0109


In [10]:
# Step 4: Save and reload fine-tuned model
fine_tuned_path = "finetuned_chatbot_model"
os.makedirs(fine_tuned_path, exist_ok=True)
model.save(fine_tuned_path)
model = SentenceTransformer(fine_tuned_path)

In [11]:
import shutil

# Compress the fine-tuned model directory into a zip file
model_folder = "finetuned_chatbot_model"
shutil.make_archive(model_folder, 'zip', model_folder)
print(f"✅ Model zipped as {model_folder}.zip")


✅ Model zipped as finetuned_chatbot_model.zip


In [15]:
from google.colab import files
files.download("finetuned_chatbot_model.zip")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [16]:
# Step 5: Encode FAQ questions
faq_embeddings = model.encode(faq_questions, convert_to_tensor=True)

In [23]:
# For duplicate detection
global_previous_questions = []

def is_duplicate(query, threshold=0.8):
    global global_previous_questions
    query_vec = model.encode(query, convert_to_tensor=True)
    for pq in global_previous_questions:
        pq_vec = model.encode(pq, convert_to_tensor=True)
        sim = util.cos_sim(query_vec, pq_vec).item()
        if sim > threshold:
            return True
    global_previous_questions.append(query)

In [24]:
def get_answer(query):
    query_vec = model.encode(query, convert_to_tensor=True)
    sims = util.cos_sim(query_vec, faq_embeddings)
    idx = torch.argmax(sims)
    return faq_answers[idx], faq_questions[idx]

In [25]:
def get_next_question(matched_question):
    next_q = next_question_map.get(matched_question, "")
    return next_q if next_q.strip() else None

In [26]:
def chatbot(query):
    if is_duplicate(query):
        return "You've already asked this or a similar question."
    answer, matched_question = get_answer(query)
    next_q = get_next_question(matched_question)
    if next_q:
        return f"{answer}\nWould you like to know: {next_q}"
    return answer

In [None]:
if __name__ == "__main__":
    print("Chatbot is ready. Type your question or type 'exit' to quit.")
    while True:
        user_input = input("You: ")
        if user_input.lower() in ["exit", "quit"]:
            print("Bot: Goodbye!")
            break
        response = chatbot(user_input)
        print("Bot:", response)

Chatbot is ready. Type your question or type 'exit' to quit.
You: What is WITDS?
Bot: WITDS is a Dehradun-based IT services company offering end-to-end digital solutions.
Would you like to know: What benefits do long-term partnerships bring to clients working with WITDS?
You: yes
Bot: WITDS is a Dehradun-based IT services company offering end-to-end digital solutions.
Would you like to know: What benefits do long-term partnerships bring to clients working with WITDS?
You: Does WITDS offer mobile app development?
Bot: Yes, WITDS provides full-cycle mobile app development services for both Android and iOS platforms.
Would you like to know: What makes WITDS beneficial for enterprises?
You: Does WITDS offer mobile app development?
Bot: You've already asked this or a similar question.
You: What is WITDS?
Bot: You've already asked this or a similar question.
You: How does WITDS approach a new project?
Bot: WITDS begins with understanding the client’s needs, followed by strategy design, techn