In [100]:
import pandas as pd
import torch
from transformers import GPT2Tokenizer, AdamW
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
from dataclasses import dataclass
import math
import torch.nn.functional as F
from tqdm import tqdm
from sklearn.model_selection import train_test_split


file1_path = '/content/cdata.csv'
file2_path = '/content/cdata2.csv'

data1 = pd.read_csv(file1_path)
data2 = pd.read_csv(file2_path)


combined_data = pd.concat([data1, data2], ignore_index=True)

In [101]:
def remove_text_after_question_mark(text):
    return text.split('?')[0] + '?' if '?' in text else text

combined_data['user_query'] = combined_data['user_query'].apply(remove_text_after_question_mark)

combined_data['bot_response'].fillna('missing_response', inplace=True)


tokenizer = GPT2Tokenizer.from_pretrained('EleutherAI/gpt-neo-2.7B')


tokenizer.pad_token = tokenizer.eos_token


def tokenize_data(data):
    return tokenizer(data, return_tensors='pt', padding=True, truncation=True, max_length=tokenizer.model_max_length)

tokenized_queries = tokenize_data(combined_data['user_query'].tolist())
tokenized_responses = tokenize_data(combined_data['bot_response'].tolist())


train_data, val_data = train_test_split(combined_data, test_size=0.1)
tokenized_train_queries = tokenize_data(train_data['user_query'].tolist())
tokenized_train_responses = tokenize_data(train_data['bot_response'].tolist())
tokenized_val_queries = tokenize_data(val_data['user_query'].tolist())
tokenized_val_responses = tokenize_data(val_data['bot_response'].tolist())

In [102]:

class QueryDataset(Dataset):
    def __init__(self, queries, responses):
        self.queries = queries
        self.responses = responses

    def __len__(self):
        return len(self.queries['input_ids'])

    def __getitem__(self, idx):
        return {
            'input_ids': self.queries['input_ids'][idx],
            'attention_mask': self.queries['attention_mask'][idx],
            'labels': self.responses['input_ids'][idx]
        }


train_dataset = QueryDataset(tokenized_train_queries, tokenized_train_responses)
train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True)

val_dataset = QueryDataset(tokenized_val_queries, tokenized_val_responses)
val_dataloader = DataLoader(val_dataset, batch_size=4, shuffle=False)

In [103]:
class LayerNorm(nn.Module):
    def __init__(self, ndim, bias):
        super().__init__()
        self.weight = nn.Parameter(torch.ones(ndim))
        self.bias = nn.Parameter(torch.zeros(ndim)) if bias else None

    def forward(self, input):
        return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)

class CausalSelfAttention(nn.Module):
    def __init__(self, config):
        super().__init__()
        assert config.n_embd % config.n_head == 0
        self.c_attn = nn.Linear(config.n_embd, 3 * config.n_embd, bias=config.bias)
        self.c_proj = nn.Linear(config.n_embd, config.n_embd, bias=config.bias)
        self.attn_dropout = nn.Dropout(config.dropout)
        self.resid_dropout = nn.Dropout(config.dropout)
        self.n_head = config.n_head
        self.n_embd = config.n_embd
        self.dropout = config.dropout
        self.flash = hasattr(torch.nn.functional, 'scaled_dot_product_attention')
        if not self.flash:
            print("WARNING: using slow attention. Flash Attention requires PyTorch >= 2.0")
            self.register_buffer("bias", torch.tril(torch.ones(config.block_size, config.block_size))
                                        .view(1, 1, config.block_size, config.block_size))

    def forward(self, x):
        B, T, C = x.size()
        q, k, v  = self.c_attn(x).split(self.n_embd, dim=2)
        k = k.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
        q = q.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
        v = v.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
        if self.flash:
            y = torch.nn.functional.scaled_dot_product_attention(q, k, v, attn_mask=None, dropout_p=self.dropout if self.training else 0, is_causal=True)
        else:
            att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))
            att = att.masked_fill(self.bias[:,:,:T,:T] == 0, float('-inf'))
            att = F.softmax(att, dim=-1)
            att = self.attn_dropout(att)
            y = att @ v
        y = y.transpose(1, 2).contiguous().view(B, T, C)
        y = self.resid_dropout(self.c_proj(y))
        return y

class MLP(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.c_fc    = nn.Linear(config.n_embd, 4 * config.n_embd, bias=config.bias)
        self.gelu    = nn.GELU()
        self.c_proj  = nn.Linear(4 * config.n_embd, config.n_embd, bias=config.bias)
        self.dropout = nn.Dropout(config.dropout)

    def forward(self, x):
        x = self.c_fc(x)
        x = self.gelu(x)
        x = self.c_proj(x)
        x = self.dropout(x)
        return x

class Block(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.ln_1 = LayerNorm(config.n_embd, bias=config.bias)
        self.attn = CausalSelfAttention(config)
        self.ln_2 = LayerNorm(config.n_embd, bias=config.bias)
        self.mlp = MLP(config)

    def forward(self, x):
        x = x + self.attn(self.ln_1(x))
        x = x + self.mlp(self.ln_2(x))
        return x

@dataclass
class GPTConfig:
    block_size: int = 1024
    vocab_size: int = 50304
    n_layer: int = 12
    n_head: int = 12
    n_embd: int = 768
    dropout: float = 0.0
    bias: bool = True

class GPT(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.tokenizer = tokenizer
        assert config.vocab_size is not None
        assert config.block_size is not None
        self.config = config

        self.transformer = nn.ModuleDict(dict(
            wte = nn.Embedding(config.vocab_size, config.n_embd),
            wpe = nn.Embedding(config.block_size, config.n_embd),
            drop = nn.Dropout(config.dropout),
            h = nn.ModuleList([Block(config) for _ in range(config.n_layer)]),
            ln_f = LayerNorm(config.n_embd, bias=config.bias),
        ))
        self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
        self.transformer.wte.weight = self.lm_head.weight

        self.apply(self._init_weights)
        for pn, p in self.named_parameters():
            if pn.endswith('c_proj.weight'):
                torch.nn.init.normal_(p, mean=0.0, std=0.02/math.sqrt(2 * config.n_layer))

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
            if module.bias is not None:
                torch.nn.init.zeros_(module.bias)
        elif isinstance(module, nn.Embedding):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)

    def forward(self, idx, targets=None):
      device = idx.device
      b, t = idx.size()
      assert t <= self.config.block_size, f"Cannot forward sequence of length {t}, block size is only {self.config.block_size}"
      pos = torch.arange(0, t, dtype=torch.long, device=device)

      tok_emb = self.transformer.wte(idx)
      pos_emb = self.transformer.wpe(pos)
      x = self.transformer.drop(tok_emb + pos_emb)
      for block in self.transformer.h:
          x = block(x)

      x = self.transformer.ln_f(x)

      if targets is not None:
        logits = self.lm_head(x)
        logits_flat = logits.reshape(-1, logits.size(-1))
        targets_flat = targets.reshape(-1)
        loss = F.cross_entropy(logits_flat, targets_flat, ignore_index=-1)
        preds = torch.argmax(logits, dim=-1)
        mask = (targets != -1)
        num_correct = (preds == targets) & mask
        accuracy = num_correct.sum().item() / mask.sum().item()
      else:

        logits = self.lm_head(x[:, [-1], :])
        loss = None
        accuracy = None

      return logits, loss, accuracy


    @torch.no_grad()
    def generate(self, idx, max_new_tokens, temperature=1.2, top_k=50, tokenizer=None):
        generated_tokens = []
        min_length = 20
        for _ in range(max_new_tokens):
            idx_cond = idx if idx.size(1) <= self.config.block_size else idx[:, -self.config.block_size:]
            logits, _, _ = self(idx_cond)
            logits = logits[:, -1, :] / temperature
            if top_k is not None:
                values, _ = torch.topk(logits, top_k)
                logits[logits < values[:, [-1]]] = float('-inf')
            probs = F.softmax(logits, dim=-1)

            next_token = torch.multinomial(probs, num_samples=1)
            if len(generated_tokens) > min_length and next_token.item() == tokenizer.eos_token_id:
                break
            generated_tokens.append(next_token.item())
            idx = torch.cat((idx, next_token), dim=1)
        return idx, generated_tokens

    @torch.no_grad()
    def beam_search_generate(self, input_ids, beam_size=3, max_length=50, length_penalty=1.0):
        batch_size = input_ids.size(0)
        input_length = input_ids.size(1)


        generated = input_ids.unsqueeze(1).expand(batch_size, beam_size, input_length).contiguous().view(-1, input_length)
        beam_scores = torch.zeros((batch_size, beam_size), dtype=torch.float, device=input_ids.device)
        beam_scores[:, 1:] = -1e9
        beam_scores = beam_scores.view(-1)

        for _ in range(max_length - input_length):
            next_token_logits, _, _ = self(generated)
            next_token_logits = next_token_logits[:, -1, :]

            scores = next_token_logits + beam_scores[:, None].expand_as(next_token_logits)
            next_token_scores = scores.view(batch_size, beam_size * next_token_logits.size(-1))
            next_token_scores, next_tokens = torch.topk(next_token_scores, beam_size, dim=1, largest=True, sorted=True)

            next_indices = next_tokens // next_token_logits.size(-1)
            next_tokens = next_tokens % next_token_logits.size(-1)

            beam_outputs = []
            for batch_idx in range(batch_size):
                for beam_idx in range(beam_size):
                    beam_outputs.append(generated[batch_idx * beam_size + next_indices[batch_idx, beam_idx]])

            generated = torch.stack(beam_outputs, dim=0).view(batch_size * beam_size, -1)
            next_tokens = next_tokens.view(batch_size * beam_size, 1)
            generated = torch.cat([generated, next_tokens], dim=-1)

            beam_scores = next_token_scores.view(batch_size * beam_size)

        generated = generated.view(batch_size, beam_size, -1)
        best_beams = generated[:, 0, :]

        return best_beams

# Initialize the model
config = GPTConfig()
model = GPT(config)
model.to('cuda')


optimizer = AdamW(model.parameters(), lr=3e-4)






In [None]:

epochs = 50
for epoch in range(epochs):
    model.train()
    total_loss = 0
    total_accuracy = 0
    for batch in tqdm(train_dataloader, desc=f"Training Epoch {epoch + 1}"):
        optimizer.zero_grad()

        input_ids = batch['input_ids'].to('cuda')
        attention_mask = batch['attention_mask'].to('cuda')
        labels = batch['labels'].to('cuda')


        labels = labels[:, :input_ids.size(1)]

        logits, loss, accuracy = model(input_ids, targets=labels)

        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        total_accuracy += accuracy

    avg_loss = total_loss / len(train_dataloader)
    avg_accuracy = total_accuracy / len(train_dataloader)
    print(f"Epoch {epoch + 1}, Training Loss: {avg_loss:.4f}, Training Accuracy: {avg_accuracy:.4f}")

    model.eval()
    val_loss = 0
    val_accuracy = 0
    with torch.no_grad():
        for batch in val_dataloader:
            input_ids = batch['input_ids'].to('cuda')
            attention_mask = batch['attention_mask'].to('cuda')
            labels = batch['labels'].to('cuda')

            # Align the length of input_ids and labels
            labels = labels[:, :input_ids.size(1)]

            logits, loss, accuracy = model(input_ids, targets=labels)

            val_loss += loss.item()
            val_accuracy += accuracy

    avg_val_loss = val_loss / len(val_dataloader)
    avg_val_accuracy = val_accuracy / len(val_dataloader)
    print(f"Epoch {epoch + 1}, Validation Loss: {avg_val_loss:.4f}, Validation Accuracy: {avg_val_accuracy:.4f}")

print("Training completed!")



Training Epoch 1: 100%|██████████| 107/107 [00:07<00:00, 14.14it/s]


Epoch 1, Training Loss: 3.8438, Training Accuracy: 0.2216
Epoch 1, Validation Loss: 4.6934, Validation Accuracy: 0.2319


Training Epoch 2: 100%|██████████| 107/107 [00:06<00:00, 15.40it/s]


Epoch 2, Training Loss: 3.5909, Training Accuracy: 0.2325
Epoch 2, Validation Loss: 4.6219, Validation Accuracy: 0.2486


Training Epoch 3: 100%|██████████| 107/107 [00:07<00:00, 14.90it/s]


Epoch 3, Training Loss: 3.3944, Training Accuracy: 0.2460
Epoch 3, Validation Loss: 4.6605, Validation Accuracy: 0.2542


Training Epoch 4: 100%|██████████| 107/107 [00:06<00:00, 15.32it/s]


Epoch 4, Training Loss: 3.2402, Training Accuracy: 0.2532
Epoch 4, Validation Loss: 4.6212, Validation Accuracy: 0.2653


Training Epoch 5: 100%|██████████| 107/107 [00:07<00:00, 14.93it/s]


Epoch 5, Training Loss: 3.1060, Training Accuracy: 0.2615
Epoch 5, Validation Loss: 4.6209, Validation Accuracy: 0.2472


Training Epoch 6: 100%|██████████| 107/107 [00:07<00:00, 15.21it/s]


Epoch 6, Training Loss: 3.0092, Training Accuracy: 0.2604
Epoch 6, Validation Loss: 4.6893, Validation Accuracy: 0.2750


Training Epoch 7: 100%|██████████| 107/107 [00:07<00:00, 14.74it/s]


Epoch 7, Training Loss: 2.9158, Training Accuracy: 0.2636
Epoch 7, Validation Loss: 4.6900, Validation Accuracy: 0.2708


Training Epoch 8: 100%|██████████| 107/107 [00:07<00:00, 14.91it/s]


Epoch 8, Training Loss: 2.7894, Training Accuracy: 0.2679
Epoch 8, Validation Loss: 4.6503, Validation Accuracy: 0.2681


Training Epoch 9: 100%|██████████| 107/107 [00:06<00:00, 15.47it/s]


Epoch 9, Training Loss: 2.7313, Training Accuracy: 0.2732
Epoch 9, Validation Loss: 4.7227, Validation Accuracy: 0.2444


Training Epoch 10: 100%|██████████| 107/107 [00:07<00:00, 14.92it/s]


Epoch 10, Training Loss: 2.6145, Training Accuracy: 0.2871
Epoch 10, Validation Loss: 4.7589, Validation Accuracy: 0.2556


Training Epoch 11: 100%|██████████| 107/107 [00:06<00:00, 15.51it/s]


Epoch 11, Training Loss: 2.5717, Training Accuracy: 0.2816
Epoch 11, Validation Loss: 4.8340, Validation Accuracy: 0.2514


Training Epoch 12: 100%|██████████| 107/107 [00:07<00:00, 14.92it/s]


Epoch 12, Training Loss: 2.4910, Training Accuracy: 0.2923
Epoch 12, Validation Loss: 4.7612, Validation Accuracy: 0.2889


Training Epoch 13: 100%|██████████| 107/107 [00:06<00:00, 15.33it/s]


Epoch 13, Training Loss: 2.4199, Training Accuracy: 0.3031
Epoch 13, Validation Loss: 4.9115, Validation Accuracy: 0.2486


Training Epoch 14: 100%|██████████| 107/107 [00:07<00:00, 15.03it/s]


Epoch 14, Training Loss: 2.4340, Training Accuracy: 0.3035
Epoch 14, Validation Loss: 4.8450, Validation Accuracy: 0.2778


Training Epoch 15: 100%|██████████| 107/107 [00:07<00:00, 15.00it/s]


Epoch 15, Training Loss: 2.3971, Training Accuracy: 0.3107
Epoch 15, Validation Loss: 4.8255, Validation Accuracy: 0.2681


Training Epoch 16: 100%|██████████| 107/107 [00:06<00:00, 15.42it/s]


Epoch 16, Training Loss: 2.3172, Training Accuracy: 0.3169
Epoch 16, Validation Loss: 4.8954, Validation Accuracy: 0.2583


Training Epoch 17:  54%|█████▍    | 58/107 [00:03<00:03, 14.34it/s]

In [68]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import classification_report


file_path = '/content/combined_data.csv'
data = pd.read_csv(file_path)


data.dropna(subset=['user_query', 'topic'], inplace=True)


X_train, X_test, y_train, y_test = train_test_split(data['user_query'], data['topic'], test_size=0.2, random_state=42)


vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)


svm_model = SVC(kernel='linear')
svm_model.fit(X_train_tfidf, y_train)


y_pred_svm = svm_model.predict(X_test_tfidf)


report_svm = classification_report(y_test, y_pred_svm, zero_division=1)
print("SVM Classification Report:")
print(report_svm)


SVM Classification Report:
                              precision    recall  f1-score   support

        Awards - environment       1.00      1.00      1.00         2
           Awards - medicine       1.00      1.00      1.00         3
         Awards - technology       1.00      1.00      1.00         2
         Culture - Christmas       1.00      1.00      1.00         8
            Culture - Diwali       1.00      1.00      1.00         8
               Culture - Eid       1.00      1.00      1.00         4
          Culture - Hanukkah       1.00      1.00      1.00         5
        Economy - technology       1.00      1.00      1.00         7
          Education - Canada       1.00      0.00      0.00         1
          Education - France       1.00      0.00      0.00         2
         Education - Germany       1.00      0.00      0.00         1
           Education - India       1.00      0.00      0.00         1
           Education - Japan       1.00      0.00      0.00   

In [69]:
from sklearn.ensemble import RandomForestClassifier


rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_tfidf, y_train)


y_pred_rf = rf_model.predict(X_test_tfidf)


report_rf = classification_report(y_test, y_pred_rf, zero_division=1)
print("Random Forest Classification Report:")
print(report_rf)


Random Forest Classification Report:
                              precision    recall  f1-score   support

        Awards - environment       1.00      1.00      1.00         2
           Awards - medicine       1.00      1.00      1.00         3
         Awards - technology       1.00      1.00      1.00         2
         Culture - Christmas       1.00      1.00      1.00         8
            Culture - Diwali       1.00      1.00      1.00         8
               Culture - Eid       1.00      1.00      1.00         4
          Culture - Hanukkah       1.00      1.00      1.00         5
        Economy - technology       1.00      1.00      1.00         7
       Education - Australia       0.00      1.00      0.00         0
          Education - Canada       0.00      0.00      1.00         1
          Education - France       1.00      0.00      0.00         2
         Education - Germany       1.00      0.00      0.00         1
           Education - India       1.00      0.00   

In [87]:

print("SVM Classification Report:")
print(report_svm)


print("Random Forest Classification Report:")
print(report_rf)


SVM Classification Report:
                              precision    recall  f1-score   support

        Awards - environment       1.00      1.00      1.00         2
           Awards - medicine       1.00      1.00      1.00         3
         Awards - technology       1.00      1.00      1.00         2
         Culture - Christmas       1.00      1.00      1.00         8
            Culture - Diwali       1.00      1.00      1.00         8
               Culture - Eid       1.00      1.00      1.00         4
          Culture - Hanukkah       1.00      1.00      1.00         5
        Economy - technology       1.00      1.00      1.00         7
          Education - Canada       1.00      0.00      0.00         1
          Education - France       1.00      0.00      0.00         2
         Education - Germany       1.00      0.00      0.00         1
           Education - India       1.00      0.00      0.00         1
           Education - Japan       1.00      0.00      0.00   

In [71]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC


param_grid_svm = {
    'C': [0.1, 1, 10, 100],
    'gamma': [1, 0.1, 0.01, 0.001],
    'kernel': ['linear', 'rbf']
}


grid_svm = GridSearchCV(SVC(), param_grid_svm, refit=True, verbose=2, n_jobs=-1)
grid_svm.fit(X_train_tfidf, y_train)


y_pred_svm_grid = grid_svm.predict(X_test_tfidf)


report_svm_grid = classification_report(y_test, y_pred_svm_grid, zero_division=1)
print("Tuned SVM Classification Report:")
print(report_svm_grid)


Fitting 5 folds for each of 32 candidates, totalling 160 fits




Tuned SVM Classification Report:
                              precision    recall  f1-score   support

        Awards - environment       1.00      1.00      1.00         2
           Awards - medicine       1.00      1.00      1.00         3
         Awards - technology       1.00      1.00      1.00         2
         Culture - Christmas       1.00      1.00      1.00         8
            Culture - Diwali       1.00      1.00      1.00         8
               Culture - Eid       1.00      1.00      1.00         4
          Culture - Hanukkah       1.00      1.00      1.00         5
        Economy - technology       1.00      1.00      1.00         7
          Education - Canada       1.00      1.00      1.00         1
          Education - France       1.00      1.00      1.00         2
         Education - Germany       1.00      1.00      1.00         1
           Education - India       1.00      1.00      1.00         1
           Education - Japan       1.00      1.00      1

In [88]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV


file_path = '/content/combined_data.csv'
data = pd.read_csv(file_path)


data.dropna(subset=['user_query', 'topic'], inplace=True)


X_train, X_test, y_train, y_test = train_test_split(data['user_query'], data['topic'], test_size=0.2, random_state=42)


vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)


param_grid_svm = {
    'C': [0.1, 1, 10, 100],
    'gamma': [1, 0.1, 0.01, 0.001],
    'kernel': ['linear', 'rbf']
}


grid_svm = GridSearchCV(SVC(), param_grid_svm, refit=True, verbose=2, n_jobs=-1)
grid_svm.fit(X_train_tfidf, y_train)


y_pred_svm_grid = grid_svm.predict(X_test_tfidf)


report_svm_grid = classification_report(y_test, y_pred_svm_grid, zero_division=1)
print("Tuned SVM Classification Report:")
print(report_svm_grid)


query = ["prime minister of India"]
query_tfidf = vectorizer.transform(query)
predicted_topic = grid_svm.predict(query_tfidf)

print(f"The predicted topic for the query '{query[0]}' is: {predicted_topic[0]}")


Fitting 5 folds for each of 32 candidates, totalling 160 fits




Tuned SVM Classification Report:
                              precision    recall  f1-score   support

        Awards - environment       1.00      1.00      1.00         2
           Awards - medicine       1.00      1.00      1.00         3
         Awards - technology       1.00      1.00      1.00         2
         Culture - Christmas       1.00      1.00      1.00         8
            Culture - Diwali       1.00      1.00      1.00         8
               Culture - Eid       1.00      1.00      1.00         4
          Culture - Hanukkah       1.00      1.00      1.00         5
        Economy - technology       1.00      1.00      1.00         7
          Education - Canada       1.00      1.00      1.00         1
          Education - France       1.00      1.00      1.00         2
         Education - Germany       1.00      1.00      1.00         1
           Education - India       1.00      1.00      1.00         1
           Education - Japan       1.00      1.00      1

In [77]:
query = ["prime minister of India"]
query_tfidf = vectorizer.transform(query)
predicted_topic = grid_svm.predict(query_tfidf)

print(f"The predicted topic for the query '{query[0]}' is: {predicted_topic[0]}")


The predicted topic for the query 'prime minister of India' is: Politics - India


In [95]:
def generate_text(prompt, max_new_tokens=50, temperature=1.0, top_k=50, tokenizer=tokenizer):
    model.eval()
    input_ids = tokenizer.encode(prompt, return_tensors='pt').to('cuda')

    generated_ids, generated_tokens = model.generate(input_ids, max_new_tokens=max_new_tokens, temperature=temperature, top_k=top_k, tokenizer=tokenizer)
    generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    return generated_text


def try_different_parameters(prompt):
    parameters_list = [
        {'max_new_tokens': 20, 'temperature': 0.7, 'top_k': 50},
        {'max_new_tokens': 100, 'temperature': 0.5, 'top_k': 95},
        {'max_new_tokens': 150, 'temperature': 1.2, 'top_k': 80},
        {'max_new_tokens': 200, 'temperature': 1.0, 'top_k': 100},
    ]

    for params in parameters_list:
        generated_text = generate_text(
            prompt,
            max_new_tokens=params['max_new_tokens'],
            temperature=params['temperature'],
            top_k=params['top_k']
        )
        print(f"\nParameters: max_new_tokens={params['max_new_tokens']}, temperature={params['temperature']}, top_k={params['top_k']}")
        print(f"Bot response: {generated_text}")


prompt = "how is Diwali celebrated in Australia?"
try_different_parameters(prompt)
# Define the prompt/query



query_tfidf = vectorizer.transform([prompt])
predicted_topic = grid_svm.predict(query_tfidf)

print(f"The predicted topic for the query '{prompt}' is: {predicted_topic[0]}")



Parameters: max_new_tokens=20, temperature=0.7, top_k=50
Bot response: how is Diwali celebrated in Australia? with vibrant cultural as Sri traditional, dance gatherings with is, is India during, is, is the

Parameters: max_new_tokens=100, temperature=0.5, top_k=95
Bot response: how is Diwali celebrated in Australia? with vibrant community events, is with, the with USA is the with with with,ali is Japan and traditional is the such the not,w vibrant celebrated, often is the suchw which the such the is the such Japan, the with with with specific with with specific,ali is the is the widely the is is is the such,w the climate with with with with with highest specific with with with with with with with with with is the such the such is the climate is the such is is

Parameters: max_new_tokens=150, temperature=1.2, top_k=80
Bot response: how is Diwali celebrated in Australia? with vibrant communityali the foods with can environmental traditional isw private Japan traditional USA is Germany t