In [2]:
import pandas as pd
import torch
from transformers import BertTokenizer, BertModel
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split

ModuleNotFoundError: No module named 'torch'

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"Using : {torch.cuda.get_device_name(0)}")


Using device: cuda
Using : NVIDIA GeForce RTX 3050 Laptop GPU


In [None]:
file_path = 'final_questions.csv' 
df = pd.read_csv(file_path)

In [None]:
print(df.head())
print(f"The length of dataframe is :{df.shape[0]}")

                                       Question Text     Chapter_name
0                     1. Pascal, BASIC, and C are p.  Getting started
1  2. A widget is to the blueprint for a widget a...  Getting started
2       3. The two major components of an object are  Getting started
3  4. In C++, a function contained within a class...  Getting started
4  5. Protecting data from access by unauthorized...  Getting started
The length of dataframe is :1532


In [None]:
questions = df['Question Text'].tolist()
labels = df['Chapter_name'].tolist()


In [None]:
label_map = {label: idx for idx, label in enumerate(set(labels))}
map_to_label ={idx:label for idx, label in enumerate(set(labels))}
labels = [label_map[label] for label in labels]
num_classes = len(label_map)
# Load BERT tokenizer and define constants
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [None]:
#Super Parameters
max_length = 64
batch_size = 8
learning_rate = 4e-5 # (1e-5 =55%) (3e-5 = 65%) (8e-5=67%)
num_epochs = 13

In [None]:
#Intializing a question_dataset 
class QuestionDataset(Dataset):
    def __init__(self, questions, labels, tokenizer, max_length):
        self.questions = questions
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.questions)

    def __getitem__(self, idx):
        question = self.questions[idx]
        label = self.labels[idx]
        
        encoding = (self.tokenizer.encode_plus(
            question,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        ))
        
        input_ids = encoding['input_ids'].squeeze()
        attention_mask = encoding['attention_mask'].squeeze()
        
        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'label': torch.tensor(label, dtype=torch.long)
        }


In [None]:
# BERTClassifier  bert model to connected to a linear neural network to predict class
class BERTClassifier(nn.Module):
    def __init__(self, bert_model_name, num_classes):
        super(BERTClassifier, self).__init__()
        self.bert = BertModel.from_pretrained(bert_model_name)
        self.fc = nn.Linear(self.bert.config.hidden_size, num_classes)
    
    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        cls_output = outputs.pooler_output
        return self.fc(cls_output)

In [None]:
model = BERTClassifier('bert-base-uncased', num_classes).to(device)

In [None]:
dataset = QuestionDataset(questions, labels, tokenizer, max_length)
train_size = int(0.8* len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [None]:
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)# why we used adamW
#why we used cross entropy loss
criterion = nn.CrossEntropyLoss()

In [None]:
#Training the model
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for batch in train_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)
        
        optimizer.zero_grad()#what is zero_grad
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    avg_train_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch + 1}/{num_epochs}, Training Loss: {avg_train_loss}")

Epoch 1/13, Training Loss: 3.0421944779235046
Epoch 2/13, Training Loss: 2.4771701438086375
Epoch 3/13, Training Loss: 1.8665876210509957
Epoch 4/13, Training Loss: 1.3188200339481428
Epoch 5/13, Training Loss: 0.8883377131098857
Epoch 6/13, Training Loss: 0.5485400889165603
Epoch 7/13, Training Loss: 0.301681107466484
Epoch 8/13, Training Loss: 0.17110171494400733
Epoch 9/13, Training Loss: 0.08511435294935069
Epoch 10/13, Training Loss: 0.05680146831535287
Epoch 11/13, Training Loss: 0.039455867590164985
Epoch 12/13, Training Loss: 0.030546396287375842
Epoch 13/13, Training Loss: 0.021492304879107645


In [None]:
from threading import Thread
from playsound import playsound
import time
audio_file = 'alarm.mp3'
def play_sound():
    while True:
        playsound(audio_file)
sound_thread = Thread(target=play_sound, daemon=True)
sound_thread.start()
time.sleep(10)

print("Finished playing sound for 10 seconds!")


Finished playing sound for 10 seconds!


In [None]:
torch.save(model, 'model_complete.pth') 

In [None]:
def predict_question(question, model, tokenizer, device, max_length=32):
    # Preprocess the question (tokenize)
    encoding = tokenizer.encode_plus(
        question,
        max_length=max_length,
        padding='max_length',
        truncation=True,
        return_tensors='pt'
    )
    input_ids = encoding['input_ids'].to(device)
    attention_mask = encoding['attention_mask'].to(device)
    model.eval()

    with torch.no_grad():  
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        _, predicted_class = torch.max(outputs, 1)

    return predicted_class.item()

In [None]:
label_map

{'Generic Algorithms and STL': 0,
 'Associative Containers': 1,
 'Virtual Functions': 2,
 'Templates': 3,
 'Templates ': 4,
 'Inheritance': 5,
 'Copy Control': 6,
 'Sequential Containers': 7,
 'Multifile Programs': 8,
 'Functions': 9,
 'Objects and Classes': 10,
 'Structures': 11,
 'Loops and Decisions': 12,
 'Pointers and Dynamic Memory': 13,
 'String,Vectors, and Arrays': 14,
 'Getting started': 15,
 'Object-Oriented Programming': 16,
 'Operator Overloading': 17,
 'Expressions': 18,
 'Statements': 19,
 'Specialised Tools and Techniques': 20,
 'C++ Programming Basics': 21,
 'Streams and IO Library': 22,
 'Tools for Large Programs': 23,
 'Strings, Vectors, and Arrays': 24,
 'Generic Algorithms': 25,
 'Pointers': 26,
 'Variable and Basic types': 27,
 'Specialised Library Facilities': 28}

In [None]:
question = "How can you implement a template?"

In [None]:

predicted_class = predict_question(question, model, tokenizer, device)
print(f"Predicted class: {predicted_class}")
print(f'{map_to_label[predicted_class]}')

Predicted class: 3
Templates


In [None]:
def calculate_accuracy(model, dataloader, device):
    model.eval()  # Set the model to evaluation mode
    total_samples = 0
    correct_predictions = 0

    with torch.no_grad():  # Disable gradient computation
        for batch in dataloader:
            # Move inputs and labels to the device
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)
            
            # Get model predictions
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            _, predictions = torch.max(outputs, 1)  # Get the index of the highest logit
            
            # Count correct predictions
            correct_predictions += (predictions == labels).sum().item()
            total_samples += labels.size(0)

    # Calculate accuracy
    accuracy = correct_predictions / total_samples
    return accuracy


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

val_accuracy = calculate_accuracy(model, val_loader, device)
print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")

Validation Accuracy: 65.47%


In [None]:
SModel =torch.load('model_complete.pth')

  SModel =torch.load('model_complete.pth')


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

val_accuracy = calculate_accuracy(model, val_loader, device)
print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")

Validation Accuracy: 65.47%
