![](https://i.pinimg.com/736x/bb/18/e5/bb18e520050d0ef6528339f3c517d23e.jpg)

# Deep Learning using PyTorch 

|No | Topic | My Notebook | Lectures |
|---| -------- | ----------- | ------------ |
| 01 | PyTorch for Beginners | [üó∫Ô∏è](https://github.com/Rudra-G-23/AI-Engineering-Resources/tree/main/PyTorch) | [üßë‚Äçüè´](https://youtu.be/QZsguRbcOBM?si=xS9S0YBpN4AtVNiA) |
| 02 | Tensor in PyTorch | [üìí](https://www.kaggle.com/code/rudraprasadbhuyan/tensors-in-pytorch/) | [üßë‚Äçüè´](https://youtu.be/mDsFsnw3SK4?si=7UXsevk1lyPfcM9B)|
| 03 |PyTorch Autograd | [üìí](https://www.kaggle.com/code/rudraprasadbhuyan/pytorch-autograd/) | [üßë‚Äçüè´](https://youtu.be/BECZ0UB5AR0?si=fnEYItyybv5nky2y) |
| 04 |PyTorch Training Pipeline | [üìí](https://www.kaggle.com/code/rudraprasadbhuyan/simple-pytorch-training-pipeline/) | [üßë‚Äçüè´](https://youtu.be/MKxEbbKpL5Q?si=xrUovfzRyXfvmM8E) |
| 05 | PyTorch NN Module | [üìí](https://www.kaggle.com/code/rudraprasadbhuyan/simple-pytorch-nn-module?scriptVersionId=289809301)[üìí](https://www.kaggle.com/code/rudraprasadbhuyan/pytorch-training-pipeline-using-nn-module?scriptVersionId=289814480) | [üßë‚Äçüè´](https://youtu.be/CAgWNxlmYsc?si=INKJ8VcdQgcYwxAe) |
|06 |Dataset & DataLoader Class | [üìí](https://www.kaggle.com/code/rudraprasadbhuyan/dataset-and-dataloader-class-in-pytorch)[üìí](https://www.kaggle.com/code/rudraprasadbhuyan/pytorch-training-pipeline-using-dataset-dataloader/) | [üßë‚Äçüè´](https://youtu.be/RH6DeE3bY6I?si=ac6IsukunPNN_dC9) |
| 07 | ANN using PyTorch | [üìí](https://www.kaggle.com/code/rudraprasadbhuyan/ann-fashion-mnist-pytorch) | [üßë‚Äçüè´](https://youtu.be/6EJaHBJhwDs?si=77w3QVjd2qRTgd3I) |
| 08 | Trining on GPU | [üìí](https://www.kaggle.com/code/rudraprasadbhuyan/ann-fashion-mnist-pytorch-training-on-gpu/) | [üßë‚Äçüè´](https://youtu.be/CabHrf9eOVs?si=1jETgquzn5gl8Srs) |
| 09 | Optimizing the Neural Network | [üìí](https://www.kaggle.com/code/rudraprasadbhuyan/ann-fashion-mnist-pytorch-gpu-optimized/) | [üßë‚Äçüè´](https://youtu.be/7smLlJ8oj4o?si=j6N8JIl1_RHwti-2) |
| 10 | Optuna X PyTorch | [üìí](https://www.kaggle.com/code/rudraprasadbhuyan/ann-fashion-mnist-pytorch-gpu-optimized-optuna/) | [üßë‚Äçüè´](https://youtu.be/Y3s-wBBLj_o?si=sZVcZBWMyTZbSGM2) |
| 11 | CNN using PyTorch | [üìí](https://www.kaggle.com/code/rudraprasadbhuyan/cnn-using-pytorch-fashion-mnist-gpu/) | [üßë‚Äçüè´](https://youtu.be/hkiBZLRFvO4?si=pbJB9XAWJn2Tu3uM) |
| 12 | Transfer Learning | [üìí](https://www.kaggle.com/code/rudraprasadbhuyan/transfer-learning-fashion-mnist-pytorch-gpu/) | [üßë‚Äçüè´](https://youtu.be/hkiBZLRFvO4?si=B2SSffNEwdijajhk) |
| 13 | RNN using PyTorch | [üìí](https://www.kaggle.com/code/rudraprasadbhuyan/rnn-using-pytorch/) | [üßë‚Äçüè´](https://youtu.be/xjzWrPQ66VQ?si=Ngoef4KL9CDM-4lk) |

- Kaggle Dataset: https://www.kaggle.com/datasets/aadarshvani/100-unique-qa-dataset
- Drive Dataset Link: https://drive.google.com/file/d/1X4Hcj72NK7J2JYvgjICFj0R1XwUq1w0a/view
- Repo: https://github.com/Rudra-G-23/deep-learning-using-pytorch

In [275]:
import numpy as np
import pandas as pd

import torch 
import torch.nn as nn
from torchinfo import summary
from torch.utils.data import Dataset, DataLoader

import warnings
warnings.filterwarnings('ignore')

In [276]:
path = "/kaggle/input/100-unique-qa-dataset/100_Unique_QA_Dataset.csv"

In [277]:
df = pd.read_csv(path)
df.head(3)

Unnamed: 0,question,answer
0,What is the capital of France?,Paris
1,What is the capital of Germany?,Berlin
2,Who wrote 'To Kill a Mockingbird'?,Harper-Lee


# Tokenize

In [278]:
def tokenize(text):
    text = text.lower()
    text = text.replace("?", "")
    text = text.replace("''", "")
    return text.split()

In [279]:
tokenize("Who wrote 'To Kill a Mockingbird'?")

['who', 'wrote', "'to", 'kill', 'a', "mockingbird'"]

# Vocab

In [280]:
vocab = {'<UNK>':0}

In [281]:
def build_vocab(row):
    tokenized_q = tokenize(row["question"])
    tokenized_ans = tokenize(row["answer"])

    merged_tokens = tokenized_q + tokenized_ans

    for token in merged_tokens:

        if token not in vocab:
            vocab[token] = len(vocab)
            
    print(merged_tokens)

In [282]:
# now see the vocabulary 
vocab

{'<UNK>': 0}

In [283]:
df.apply(build_vocab, axis=1)

['what', 'is', 'the', 'capital', 'of', 'france', 'paris']
['what', 'is', 'the', 'capital', 'of', 'germany', 'berlin']
['who', 'wrote', "'to", 'kill', 'a', "mockingbird'", 'harper-lee']
['what', 'is', 'the', 'largest', 'planet', 'in', 'our', 'solar', 'system', 'jupiter']
['what', 'is', 'the', 'boiling', 'point', 'of', 'water', 'in', 'celsius', '100']
['who', 'painted', 'the', 'mona', 'lisa', 'leonardo-da-vinci']
['what', 'is', 'the', 'square', 'root', 'of', '64', '8']
['what', 'is', 'the', 'chemical', 'symbol', 'for', 'gold', 'au']
['which', 'year', 'did', 'world', 'war', 'ii', 'end', '1945']
['what', 'is', 'the', 'longest', 'river', 'in', 'the', 'world', 'nile']
['what', 'is', 'the', 'capital', 'of', 'japan', 'tokyo']
['who', 'developed', 'the', 'theory', 'of', 'relativity', 'albert-einstein']
['what', 'is', 'the', 'freezing', 'point', 'of', 'water', 'in', 'fahrenheit', '32']
['which', 'planet', 'is', 'known', 'as', 'the', 'red', 'planet', 'mars']
['who', 'is', 'the', 'author', 'of', "

0     None
1     None
2     None
3     None
4     None
      ... 
85    None
86    None
87    None
88    None
89    None
Length: 90, dtype: object

# Word -> Numerical Inices 

In [284]:
def text_to_indices(text, vocab):

    indexed_text = []

    for token in tokenize(text):

        if token in vocab:
            indexed_text.append(vocab[token])
        else:
            indexed_text.append(vocab['<UNK>'])
    
    return indexed_text

In [285]:
# see example
text_to_indices("Who is Rudra?", vocab)

[10, 2, 0]

In [286]:
# Check the vocab
for t in ["who", "is", "rudra"]:
    print(vocab.get(t))

10
2
None


# Custom Dataset Class

In [287]:
class QADataset(Dataset):

    def __init__(self, df, vocab):
        self.df = df
        self.vocab = vocab

    def __len__(self):
        return self.df.shape[0]

    def __getitem__(self, idx):
        numeric_q = text_to_indices(self.df.iloc[idx]["question"], self.vocab)
        numeric_ans = text_to_indices(self.df.iloc[idx]["answer"], self.vocab)

        return torch.tensor(numeric_q), torch.tensor(numeric_ans)

In [288]:
# instance
dataset = QADataset(df, vocab)

In [289]:
# check we can load data properly with our requirement 
dataset[10]

(tensor([ 1,  2,  3,  4,  5, 53]), tensor([54]))

# Data Loader Class

In [290]:
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)

In [291]:
dataloader

<torch.utils.data.dataloader.DataLoader at 0x7b7c4a691e10>

In [292]:
for q, ans in dataloader:
    print(q, ans)

tensor([[  1,   2,   3, 147, 148,  19, 149]]) tensor([[150]])
tensor([[  1,   2,   3, 164, 165, 166,  83,  84]]) tensor([[167]])
tensor([[10,  2,  3, 66,  5, 67]]) tensor([[68]])
tensor([[ 42, 117, 118,   3, 119,  94, 120]]) tensor([[121]])
tensor([[ 42, 137,   2, 227, 143,   3, 228, 229]]) tensor([[156]])
tensor([[10, 11, 12, 13, 14, 15]]) tensor([[16]])
tensor([[ 42, 168,   2,   3,  17, 169, 170]]) tensor([[171]])
tensor([[10, 96,  3, 97]]) tensor([[98]])
tensor([[ 10, 140,   3, 141, 272,  93, 273,   5,   3, 274]]) tensor([[275]])
tensor([[ 42, 314,   2, 315,  62,  63,   3, 316, 317]]) tensor([[318]])
tensor([[ 1,  2,  3, 69,  5,  3, 70, 71]]) tensor([[72]])
tensor([[  1,   2,   3,  37,  38,  39, 162]]) tensor([[163]])
tensor([[  1,  87, 230, 231, 232, 233]]) tensor([[234]])
tensor([[ 42, 217, 118, 218, 219,  19,  14, 220,  43]]) tensor([[221]])
tensor([[42, 18,  2, 62, 63,  3, 64, 18]]) tensor([[65]])
tensor([[ 42, 175,   2,  62,  39, 176, 177, 143, 178, 179]]) tensor([[180]])
tenso

# Simple NN class

Neural Network 

- One input layer (50 neurons)
- one hidden layer (64 neurons
- one output layer (324 neurons), which is vocab_size=324
- 50-dimensional embedding 

In [293]:
class SimpleRNN(nn.Module):

    def __init__(self, vocab_size):

        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim=50)
        self.rnn = nn.RNN(50, 64, batch_first=True)
        self.fc = nn.Linear(64, vocab_size)

    def forward(self, question):
        embeded_q = self.embedding(question)
        hidden, final = self.rnn(embeded_q)
        out = self.fc(final.squeeze(0))
        return out

# Check & RNN fundamentals 

## Embedding (Input Layer)

In [294]:
# first q&a fetch
dataset[45]

(tensor([ 10, 140,   3, 141, 172,   5,   3,  70, 173]), tensor([174]))

In [295]:
x = nn.Embedding(324, embedding_dim=50)
x

Embedding(324, 50)

In [296]:
# only question sent to embedding
a = x(dataset[45][0]) 
a

tensor([[-2.0004e+00,  7.5953e-02,  1.2621e+00, -3.2058e-01, -1.2299e+00,
         -5.1092e-01,  9.5349e-02, -2.6320e+00, -2.7914e-01,  1.9502e-01,
          3.0940e-01,  1.7999e+00, -3.9919e-01, -1.9983e-02, -7.4350e-01,
         -2.2623e-01,  4.2789e-01, -2.9628e-01, -6.9202e-01, -2.3869e-02,
          1.6062e+00, -2.5377e-01,  6.1512e-01,  8.8098e-01, -8.7469e-01,
         -1.4060e+00,  9.2733e-01, -6.0122e-01,  8.7368e-01,  2.5087e-01,
          9.8199e-01,  9.0094e-02, -5.2529e-01,  1.3630e+00,  2.1529e-01,
          6.2076e-02,  6.7904e-01,  1.0018e-01,  4.8726e-01,  3.3160e-01,
          4.4323e-01, -4.4627e-01, -2.5194e-01, -1.2440e+00,  3.2140e-01,
          7.1988e-01, -4.1474e-01,  2.0010e+00,  4.5588e-01, -1.8904e+00],
        [-5.5239e-01,  1.2640e+00, -1.8220e-01,  5.1918e-03, -4.4131e-01,
          2.8270e-01, -1.5224e+00,  3.7523e-01,  1.1971e+00, -2.1619e+00,
          2.0662e-01,  9.1449e-01, -7.5858e-01, -1.2834e+00,  1.1329e+00,
         -1.7198e-01,  4.7598e-01,  1

In [297]:
print(a.shape)

torch.Size([9, 50])


## RNN example (Hidden Layer)

In [298]:
# now we see the RNN 
# What RNN does with this embedding 
# 50 dim and 64 outputs
y = nn.RNN(50, 64)

![](https://i.pinimg.com/736x/c6/14/db/c614dbf3fc040cc1f2a7161f4b224228.jpg)

In [299]:
# two tupple
y(a)

(tensor([[-0.2486, -0.4983, -0.1773, -0.1265,  0.1133,  0.6968,  0.7148, -0.1298,
          -0.1708, -0.4045,  0.4692, -0.1653, -0.3188,  0.5821,  0.0809,  0.0675,
          -0.2989,  0.1629,  0.2005, -0.1664,  0.7893, -0.1634, -0.3210, -0.2256,
          -0.8517,  0.1115, -0.1994, -0.1522, -0.5058,  0.6796,  0.2720, -0.2281,
          -0.6166,  0.1463, -0.5640,  0.5867, -0.0148,  0.1488,  0.4264,  0.0836,
          -0.1851,  0.2303, -0.1853,  0.0345, -0.5174, -0.1076,  0.0730,  0.2008,
           0.3194, -0.2464,  0.1246, -0.1831,  0.4843,  0.1261,  0.3453, -0.3822,
          -0.3025,  0.0804, -0.0257, -0.2708,  0.1592,  0.2884,  0.1749, -0.2183],
         [-0.3591,  0.4443,  0.3611, -0.4099, -0.0385, -0.1709, -0.0205, -0.8223,
          -0.7038, -0.8054,  0.6134, -0.4221,  0.0130, -0.3907,  0.8548, -0.4736,
           0.3261, -0.4955,  0.8987,  0.6830,  0.3870, -0.6213,  0.5105,  0.3291,
           0.7046, -0.4227,  0.5148,  0.2049, -0.0371, -0.3230,  0.1148,  0.0255,
           0.42

In [300]:

len(y(a))

2

**What and Why**

- Here for one tupple for the o1, o2, o3 ...
- and another triple for the final output
- So we can't use the rRNNduring the sequential module
- because the module expert ,the output  is given to another layer input

In [301]:
# hidden output of the RNN
y(a)[0]

tensor([[-0.2486, -0.4983, -0.1773, -0.1265,  0.1133,  0.6968,  0.7148, -0.1298,
         -0.1708, -0.4045,  0.4692, -0.1653, -0.3188,  0.5821,  0.0809,  0.0675,
         -0.2989,  0.1629,  0.2005, -0.1664,  0.7893, -0.1634, -0.3210, -0.2256,
         -0.8517,  0.1115, -0.1994, -0.1522, -0.5058,  0.6796,  0.2720, -0.2281,
         -0.6166,  0.1463, -0.5640,  0.5867, -0.0148,  0.1488,  0.4264,  0.0836,
         -0.1851,  0.2303, -0.1853,  0.0345, -0.5174, -0.1076,  0.0730,  0.2008,
          0.3194, -0.2464,  0.1246, -0.1831,  0.4843,  0.1261,  0.3453, -0.3822,
         -0.3025,  0.0804, -0.0257, -0.2708,  0.1592,  0.2884,  0.1749, -0.2183],
        [-0.3591,  0.4443,  0.3611, -0.4099, -0.0385, -0.1709, -0.0205, -0.8223,
         -0.7038, -0.8054,  0.6134, -0.4221,  0.0130, -0.3907,  0.8548, -0.4736,
          0.3261, -0.4955,  0.8987,  0.6830,  0.3870, -0.6213,  0.5105,  0.3291,
          0.7046, -0.4227,  0.5148,  0.2049, -0.0371, -0.3230,  0.1148,  0.0255,
          0.4231,  0.4875, 

In [302]:
len(y(a))

2

In [303]:
# Output of the RNN 
y(a)[1]

tensor([[-0.3034, -0.5160, -0.2045, -0.8402,  0.7579,  0.0772, -0.6557, -0.2713,
          0.0483,  0.2883,  0.2842, -0.2808,  0.1182, -0.2878,  0.1416, -0.8131,
          0.7248, -0.7479,  0.6531, -0.1562, -0.6657,  0.5062, -0.3834,  0.7393,
         -0.6115, -0.5548,  0.1293,  0.7187, -0.8316,  0.5431, -0.3831, -0.5027,
         -0.3177,  0.0965, -0.5309,  0.0849,  0.3113, -0.3324,  0.9437,  0.0462,
         -0.1140,  0.7515, -0.7265,  0.3265,  0.4789,  0.0845, -0.5812,  0.2474,
          0.0346, -0.0660,  0.0970, -0.4096, -0.5952, -0.0939, -0.1458, -0.8204,
          0.2580, -0.0789, -0.1157, -0.4085, -0.5181,  0.3242,  0.0118, -0.3160]],
       grad_fn=<SqueezeBackward1>)

In [304]:
# final output of the rnn
y(a)[1].shape

torch.Size([1, 64])

## Output Layer

In [305]:
# final output
b = y(a)[1]

In [306]:
# send to the linear 
# 64 input form RNN and 324 output
z = nn.Linear(64, 324)

In [307]:
z(b)

tensor([[-3.8864e-02,  9.2004e-02, -5.9362e-02, -7.2805e-03, -5.4006e-01,
         -8.9106e-02,  2.0007e-01,  2.3395e-01, -3.4819e-01,  1.0696e-01,
          4.3824e-01,  3.0460e-02, -5.3667e-01, -1.8324e-01,  1.1028e-01,
          2.9386e-01,  5.7324e-01,  3.1396e-01, -3.4214e-02, -2.7798e-01,
         -4.5682e-01, -1.2937e-01,  3.2449e-03, -4.5834e-01,  6.3422e-02,
          5.8191e-01,  3.3166e-01,  2.9784e-01,  7.0271e-01, -3.4590e-01,
          2.8458e-01, -7.7505e-02,  7.1869e-02, -1.4535e-01,  3.2216e-01,
          7.3568e-01, -1.4576e-01,  9.3595e-02, -1.3700e-01,  2.4875e-01,
          3.6027e-01,  1.1423e-01,  2.6750e-01,  2.1785e-01, -2.3492e-01,
         -4.6785e-01, -3.7714e-01, -6.2081e-01,  5.5224e-01,  1.5887e-01,
          8.3219e-01,  2.6210e-02,  2.2872e-01, -5.0000e-03,  1.5053e-01,
         -4.0717e-02,  1.1372e-01,  1.9209e-01,  4.1949e-01,  2.1791e-01,
         -1.4416e-01, -3.3569e-01,  2.6504e-01,  4.9893e-01,  5.0909e-01,
         -2.0499e-02, -6.0669e-02, -1.

In [308]:
# for each vocabulary word we have probability 
z(b).shape

torch.Size([1, 324])

- That means we need to write the manual forward function

## Debugging the size 

In [309]:
x = nn.Embedding(324, embedding_dim=50)
y = nn.RNN(50, 64)
z = nn.Linear(64, 324)

# input value shape
a = dataset[0][0].reshape(1, 6)
print(a.shape)

# embedding shape
b = x(a)
print(b.shape)

# hidden, output of the rnn layer
c, d = y(b)
print(c.shape)
print(d.shape)

# output layer shape
e = z(d)
print(e.shape)

torch.Size([1, 6])
torch.Size([1, 6, 50])
torch.Size([1, 6, 64])
torch.Size([1, 6, 64])
torch.Size([1, 6, 324])


**1st impovement**

- By adding this parameter
- `batch_first=Ture`
- Notice the d and e


**2nd Impovement**
- squeeze the first 1 value
- so the shape would be 1, 324 always
- despite all the changing question sizes

In [310]:
x = nn.Embedding(324, embedding_dim=50)
y = nn.RNN(50, 64, batch_first=True)
z = nn.Linear(64, 324)

# input value shape
a = dataset[0][0].reshape(1, 6)
print(a.shape)

# embedding shape
b = x(a)
print(b.shape)

# hidden, output of the rnn layer
c, d = y(b)
print(c.shape)
print(d.shape)

# output layer shape
e = z(d.squeeze(0))
print(e.shape)

torch.Size([1, 6])
torch.Size([1, 6, 50])
torch.Size([1, 6, 64])
torch.Size([1, 1, 64])
torch.Size([1, 324])


# Imp Parameter

In [311]:
LR = 0.001
EPOCHS = 20

# Loss function & Optimizer

In [312]:
model = SimpleRNN(len(vocab))

In [313]:
summary(model)

Layer (type:depth-idx)                   Param #
SimpleRNN                                --
‚îú‚îÄEmbedding: 1-1                         16,300
‚îú‚îÄRNN: 1-2                               7,424
‚îú‚îÄLinear: 1-3                            21,190
Total params: 44,914
Trainable params: 44,914
Non-trainable params: 0

In [314]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LR)

# Training Loop

In [315]:
for epoch in range(EPOCHS):
    total_loss = 0

    for q, ans in dataloader:
        
        optimizer.zero_grad()
        output = model(q)
        
        loss = criterion(output, ans[0])
        loss.backward()
        optimizer.step()
        
        total_loss = total_loss + loss.sum().item()

    print(f" {epoch + 1} | Total Loss: {total_loss:.4f} ")

 1 | Total Loss: 526.3420 
 2 | Total Loss: 460.3975 
 3 | Total Loss: 381.1847 
 4 | Total Loss: 317.0698 
 5 | Total Loss: 266.1316 
 6 | Total Loss: 218.1204 
 7 | Total Loss: 174.2367 
 8 | Total Loss: 136.6992 
 9 | Total Loss: 105.3345 
 10 | Total Loss: 80.9902 
 11 | Total Loss: 62.4904 
 12 | Total Loss: 49.1301 
 13 | Total Loss: 39.1850 
 14 | Total Loss: 31.3143 
 15 | Total Loss: 25.8721 
 16 | Total Loss: 21.2192 
 17 | Total Loss: 17.7880 
 18 | Total Loss: 14.9592 
 19 | Total Loss: 13.0095 
 20 | Total Loss: 11.1675 


# Predict 

In [316]:
idx_to_word = {v: k for k, v in vocab.items()}
idx_to_word

{0: '<UNK>',
 1: 'what',
 2: 'is',
 3: 'the',
 4: 'capital',
 5: 'of',
 6: 'france',
 7: 'paris',
 8: 'germany',
 9: 'berlin',
 10: 'who',
 11: 'wrote',
 12: "'to",
 13: 'kill',
 14: 'a',
 15: "mockingbird'",
 16: 'harper-lee',
 17: 'largest',
 18: 'planet',
 19: 'in',
 20: 'our',
 21: 'solar',
 22: 'system',
 23: 'jupiter',
 24: 'boiling',
 25: 'point',
 26: 'water',
 27: 'celsius',
 28: '100',
 29: 'painted',
 30: 'mona',
 31: 'lisa',
 32: 'leonardo-da-vinci',
 33: 'square',
 34: 'root',
 35: '64',
 36: '8',
 37: 'chemical',
 38: 'symbol',
 39: 'for',
 40: 'gold',
 41: 'au',
 42: 'which',
 43: 'year',
 44: 'did',
 45: 'world',
 46: 'war',
 47: 'ii',
 48: 'end',
 49: '1945',
 50: 'longest',
 51: 'river',
 52: 'nile',
 53: 'japan',
 54: 'tokyo',
 55: 'developed',
 56: 'theory',
 57: 'relativity',
 58: 'albert-einstein',
 59: 'freezing',
 60: 'fahrenheit',
 61: '32',
 62: 'known',
 63: 'as',
 64: 'red',
 65: 'mars',
 66: 'author',
 67: "'1984'",
 68: 'george-orwell',
 69: 'currency',
 7

In [317]:
def predict(question, model, vocab, th=0.5):
    
    model.eval()

    # Convert text ‚Üí indices
    numerical_question = text_to_indices(question, vocab)
    q_tensor = torch.tensor(numerical_question).unsqueeze(0)  # (1, seq_len)

    with torch.no_grad():
        output = model(q_tensor)   # (1, vocab_size)
        probs = torch.softmax(output, dim=1)
        value, idx = torch.max(probs, dim=1)

    if value.item() < th:
        print("Give me more data...")
        return

    idx_to_word = {v: k for k, v in vocab.items()}

    print("Answer is ..")
    print(idx_to_word[idx.item()])


In [318]:
predict("What is the largest planet in our solar system?", model, vocab)

Answer is ..
jupiter


In [319]:
predict("What is the boiling point of water in Celsius?", model, vocab)

Answer is ..
100


In [320]:
predict("What is the square root of 64?", model, vocab)

Answer is ..
8


#  What an ans üòÜ

![](https://i.pinimg.com/736x/1e/98/8f/1e988f4c4d7df3c13df8a628e1ed6d5f.jpg)

In [322]:
predict("Who is rudra", model, vocab)

Give me more data...
