In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Required Libraries
import nltk
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords

# Transformers Library for NLP
from transformers import DistilBertForQuestionAnswering, DistilBertTokenizer
import torch

# Step 1: Upload the file to Google Colab
from google.colab import files

# This will prompt you to upload your dataset file (e.g., dataset.txt)
#uploaded = files.upload()

# Step 2: Load the file content
# Replace 'dataset.txt' with your actual filename
file_name = '/content/drive/MyDrive/Data/data.txt'  # Ensure your file is named correctly
with open(file_name, 'r') as file:
    data = file.read()

# Print the dataset to verify
print("Dataset loaded successfully!")
print(data)

# Download necessary NLTK data (only need to do this once)
nltk.download('punkt')
nltk.download('stopwords')

# Initialize DistilBERT QA model and tokenizer
model_name = "distilbert-base-uncased"
model = DistilBertForQuestionAnswering.from_pretrained(model_name)
tokenizer = DistilBertTokenizer.from_pretrained(model_name)

# Define a set of intents with keywords
intents = {
    'course_info': ['courses', 'program', 'training'],
    'admission_process': ['admission', 'apply', 'enroll', 'submit'],
    'center_purpose': ['purpose', 'goal', 'mission', 'query'],
    'location_info': ['location', 'address', 'name of this', 'purpose of the AJK-IT'],
    'timing_info': ['timing', 'schedule', 'office hours', 'class'],
    'course_duration': ['duration', 'how long', 'course timeline', 'located'],
    'contact_info': ['contact', 'phone', 'email'],
    'fees_info': ['fees', 'cost', 'tuition'],
    'unknown': []
}

# Function for classifying intent based on keywords
def classify_intent(input_text):
    input_words = RegexpTokenizer(r'\w+').tokenize(input_text.lower())
    stop_words = set(stopwords.words('english'))
    filtered_words = [word for word in input_words if word not in stop_words]

    for intent, keywords in intents.items():
        for keyword in keywords:
            if any(word in keyword.lower() for word in filtered_words):
                return intent
    return 'unknown'

# Function to extract a relevant answer from the dataset
def extract_answer(question, data):
    lines = data.split('\n')
    for i in range(len(lines)):
        if question.lower() in lines[i].lower():
            # Answer is expected to be in the next line
            if i + 1 < len(lines) and lines[i + 1].startswith('A.'):
                return lines[i + 1]
    return "Sorry, I don't have an answer for that."

# Function to process user input and return an appropriate response
def process_input(user_input):
    intent = classify_intent(user_input)
    response = ""

    if intent != 'unknown':
        # Attempt to find a relevant answer from the dataset
        response = extract_answer(user_input, data)
    else:
        response = "Sorry, I didn't understand your question."

    return response

# Example of processing user input
user_input = "What is the purpose of the AJK-IT Excellence Centre?"
response = process_input(user_input)
print("Response:", response)

# User interaction loop (optional)
while True:
    user_input = input("Ask a question: ")
    if user_input.lower() in ['exit', 'quit']:
        break
    response = process_input(user_input)
    print("Response:", response)


Dataset loaded successfully!
Q.1: What is the name of this IT Centre?
A.1: This is the AJK-IT Excellence Centre. 
Q.2: What is the purpose of the AJK-IT Excellence Centre?
A.2: The AJK-IT Excellence Centre is a leading educational institution designed to provide high-quality training in IT and software development. It focuses on equipping students with industry-relevant skills through practical, hands-on learning experiences.
Q.3: Where is the AJK IT Center located?
A.3: The AJK IT Center is located at [AJK IT Centre of Excellence, 3rd Floor, Rawalakot Model College, Khrick Road Rawalakot].
Q.4: What courses does the AJK IT Center offer?
A.4: We offer courses in: 
   Web Design & Development
   Data Science
   Digital Marketing
   E-Commerce
   Cyber Security
   UI/UX Design
   Mobile App Design & Development
   SEO
   Graphic Designing
   Creative Designing 
   Content Writing.
Q.5: Can you tell me the timings for the courses?
A.5: Our courses are available in both morning and evening

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForQuestionAnswering were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Response: A.2: The AJK-IT Excellence Centre is a leading educational institution designed to provide high-quality training in IT and software development. It focuses on equipping students with industry-relevant skills through practical, hands-on learning experiences.
Ask a question: Does the IT center offer online courses?
Response: A.20: No, Our center does not offer online courses.
