In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from collections import Counter
from torch.utils.data import Dataset, DataLoader
from nltk.tokenize import word_tokenize
import nltk

In [4]:
document = """Kashmir, often referred to as “Paradise on Earth,” is one of the most breathtaking regions in the Indian subcontinent. Nestled in the northern part of India, it is renowned for its snow-capped mountains, lush valleys, pristine lakes, blooming gardens, and rich cultural heritage. From ancient temples and Mughal gardens to houseboats floating on Dal Lake, Kashmir offers a blend of natural beauty and historical charm that attracts travelers from all over the world.

Geography and Climate

Kashmir is surrounded by the majestic Himalayas and the Pir Panjal mountain range. The region is divided into three major parts: Jammu, the Kashmir Valley, and Ladakh. Each part has a distinct climate and landscape.

Jammu has a subtropical climate with hot summers and mild winters.

Kashmir Valley experiences cool summers and cold, snowy winters.

Ladakh is a cold desert with extremely low temperatures during winter.

Spring and autumn are considered the best seasons to visit due to pleasant weather and scenic beauty. Summer months are ideal for sightseeing and trekking, while winter attracts snow lovers and adventure sports enthusiasts.

Culture and Traditions

Kashmiri culture is a beautiful blend of Persian, Central Asian, and Indian influences. The region is known for its warm hospitality, traditional music, dance, and festivals.

Language: Kashmiri, Urdu, Hindi, and English are widely spoken.

Traditional Attire: Pheran (a long woolen gown) is commonly worn in winter.

Festivals: Eid, Diwali, Baisakhi, and Navroz are celebrated with great enthusiasm.

The art of storytelling, poetry, and folk music plays a vital role in preserving Kashmir’s cultural identity.

Food and Cuisine

Kashmiri cuisine is rich, aromatic, and flavorful. The famous Wazwan is a traditional multi-course meal prepared on special occasions.

Popular dishes include:

Rogan Josh

Yakhni

Dum Aloo

Gushtaba

Kahwa (traditional saffron tea)

Sheermal and Lavasa breads

Spices like saffron, cardamom, fennel, and dry ginger give Kashmiri food its distinctive taste.

Tourism and Attractions

1. Srinagar
The summer capital of Jammu and Kashmir, Srinagar is famous for Dal Lake, Mughal Gardens (Shalimar Bagh, Nishat Bagh), and houseboats.

2. Gulmarg
Known for skiing and snowboarding, Gulmarg is a winter sports paradise. It also has one of the world’s highest cable cars, the Gulmarg Gondola.

3. Pahalgam
A peaceful hill town, Pahalgam is known for lush meadows, river rafting, and scenic trekking routes.

4. Sonamarg
Often called the “Meadow of Gold,” Sonamarg is the gateway to several Himalayan trekking trails.

5. Leh-Ladakh
Famous for monasteries, high-altitude lakes like Pangong Tso, and rugged mountain landscapes.

Adventure and Outdoor Activities

Kashmir is a dream destination for adventure lovers.

Trekking and hiking in the Himalayas

River rafting in Lidder and Zanskar rivers

Skiing and snowboarding in Gulmarg

Paragliding and mountain biking

Camping and nature photography

Handicrafts and Shopping

Kashmir is world-famous for its handicrafts, which reflect the artistic skills passed down through generations.

Pashmina shawls

Kashmiri carpets

Walnut wood carvings

Papier-mâché artifacts

Saffron and dry fruits

Local markets like Lal Chowk in Srinagar are popular shopping destinations.

Education and Economy

The economy of Kashmir largely depends on tourism, agriculture, and handicrafts.

Agriculture: Apples, cherries, almonds, and saffron are major crops.

Handicrafts: A significant source of employment and exports.

Tourism: A growing contributor to the local economy.

Educational institutions in the region focus on arts, sciences, medicine, and engineering.

Safety and Travel Tips

Always check weather conditions before planning your trip.

Carry warm clothes even during summer nights.

Respect local customs and traditions.

Avoid traveling to remote areas without a guide.

Keep necessary documents handy while traveling.

Connectivity and Transport

Air: Srinagar Airport connects Kashmir to major Indian cities.

Road: National highways link Kashmir with Jammu and Ladakh.

Rail: The railway network is expanding to improve connectivity.

Local Transport: Taxis, buses, and shikaras (boats) are commonly used.

Why Visit Kashmir?

Kashmir offers a unique combination of serenity, adventure, spirituality, and cultural richness. Whether you are a nature lover, an adventure seeker, or someone looking for peace and relaxation, Kashmir has something for everyone. The mesmerizing landscapes, warm hospitality, and timeless traditions make it a destination worth exploring at least once in a lifetime.
"""


In [5]:
import nltk
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [6]:
tokens= word_tokenize(document)

In [7]:
tokens


['Kashmir',
 ',',
 'often',
 'referred',
 'to',
 'as',
 '“',
 'Paradise',
 'on',
 'Earth',
 ',',
 '”',
 'is',
 'one',
 'of',
 'the',
 'most',
 'breathtaking',
 'regions',
 'in',
 'the',
 'Indian',
 'subcontinent',
 '.',
 'Nestled',
 'in',
 'the',
 'northern',
 'part',
 'of',
 'India',
 ',',
 'it',
 'is',
 'renowned',
 'for',
 'its',
 'snow-capped',
 'mountains',
 ',',
 'lush',
 'valleys',
 ',',
 'pristine',
 'lakes',
 ',',
 'blooming',
 'gardens',
 ',',
 'and',
 'rich',
 'cultural',
 'heritage',
 '.',
 'From',
 'ancient',
 'temples',
 'and',
 'Mughal',
 'gardens',
 'to',
 'houseboats',
 'floating',
 'on',
 'Dal',
 'Lake',
 ',',
 'Kashmir',
 'offers',
 'a',
 'blend',
 'of',
 'natural',
 'beauty',
 'and',
 'historical',
 'charm',
 'that',
 'attracts',
 'travelers',
 'from',
 'all',
 'over',
 'the',
 'world',
 '.',
 'Geography',
 'and',
 'Climate',
 'Kashmir',
 'is',
 'surrounded',
 'by',
 'the',
 'majestic',
 'Himalayas',
 'and',
 'the',
 'Pir',
 'Panjal',
 'mountain',
 'range',
 '.',
 '

In [8]:
vocab={"<UNK>":0}

for token in Counter(tokens).keys():
  if token not in vocab:
    vocab[token]= len(vocab)

In [9]:
len(vocab)

417

In [10]:
len(document.split("\n")) #split  after evry line break

131

In [11]:
input_sentence= document.split("\n")

In [12]:
def text_to_indicies(sentence,vocab):
  indicies_sentence=[]
  for token in sentence:
    if token in vocab:
      indicies_sentence.append(vocab[token])
    else:
      indicies_sentence.append(vocab["<UNK>"])
  return indicies_sentence


In [13]:

numerical_sentence=[]

for sentence in input_sentence:
  sentence_indicies= text_to_indicies(word_tokenize(sentence),vocab)
  numerical_sentence.append(sentence_indicies)

In [14]:
numerical_sentence

[[1,
  2,
  3,
  4,
  5,
  6,
  7,
  8,
  9,
  10,
  2,
  11,
  12,
  13,
  14,
  15,
  16,
  17,
  18,
  19,
  15,
  20,
  21,
  22,
  23,
  19,
  15,
  24,
  25,
  14,
  26,
  2,
  27,
  12,
  28,
  29,
  30,
  31,
  32,
  2,
  33,
  34,
  2,
  35,
  36,
  2,
  37,
  38,
  2,
  39,
  40,
  41,
  42,
  22,
  43,
  44,
  45,
  39,
  46,
  38,
  5,
  47,
  48,
  9,
  49,
  50,
  2,
  1,
  51,
  52,
  53,
  14,
  54,
  55,
  39,
  56,
  57,
  58,
  59,
  60,
  61,
  62,
  63,
  15,
  64,
  22],
 [],
 [65, 39, 66],
 [],
 [1,
  12,
  67,
  68,
  15,
  69,
  70,
  39,
  15,
  71,
  72,
  73,
  74,
  22,
  75,
  76,
  12,
  77,
  78,
  79,
  80,
  81,
  82,
  83,
  2,
  15,
  1,
  84,
  2,
  39,
  85,
  22,
  86,
  25,
  87,
  52,
  88,
  89,
  39,
  90,
  22],
 [],
 [83, 87, 52, 91, 89, 92, 93, 94, 39, 95, 96, 22],
 [],
 [1, 84, 97, 98, 94, 39, 99, 2, 100, 96, 22],
 [],
 [85, 12, 52, 99, 101, 92, 102, 103, 104, 105, 106, 22],
 [],
 [107,
  39,
  108,
  109,
  110,
  15,
  111,
  112,
  5,
 

In [15]:
training_sequence=[]
for sentence in numerical_sentence:
  for i in range(1,len(sentence)):
    training_sequence.append((sentence[:i+1]))

In [16]:
len(training_sequence)

746

In [17]:

length=[]

for sequence in training_sequence:
  length.append(len(sequence))
max(length)



86

In [18]:
#padding

paddeded_sequence=[]
for sequence in training_sequence:
  paddeded_sequence.append(([0]*(max(length)-len(sequence))+sequence))

len(paddeded_sequence[0])

86

In [19]:
torch_padded_Sequence= torch.tensor(paddeded_sequence)

In [20]:
torch_padded_Sequence

tensor([[  0,   0,   0,  ...,   0,   1,   2],
        [  0,   0,   0,  ...,   1,   2,   3],
        [  0,   0,   0,  ...,   2,   3,   4],
        ...,
        [  0,   0,   0,  ..., 415,  19,  52],
        [  0,   0,   0,  ...,  19,  52, 416],
        [  0,   0,   0,  ...,  52, 416,  22]])

In [21]:
X=torch_padded_Sequence[:,:-1]
Y=torch_padded_Sequence[:,-1]

In [22]:
X[0]

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1])

In [23]:
Y[0]

tensor(2)

In [24]:
class CoustomDataset(Dataset):

  def __init__(self,X,Y):
    self.x=X
    self.y=Y

  def __len__(self):
    return self.x.shape[0]

  def __getitem__(self,index):
    return self.x[index], self.y[index]


In [25]:
dataset=CoustomDataset(X,Y)

In [26]:
dataset[0]

(tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]),
 tensor(2))

In [27]:
dataloader=DataLoader(dataset, batch_size=32, shuffle=True)

In [28]:
len(vocab)

417

In [29]:
class LstmModel(nn.Module):

  def __init__(self,vocab_size):
    super().__init__()

    self.embeded= nn.Embedding(vocab_size,100)  #100 is embeded dimension each word will be of 100 dimensions
    self.lstm= nn.LSTM(100,150, batch_first=True)
    self.fc= nn.Linear(150,vocab_size) #will give probablity correspond to every word

  def forward(self,x):
    embedded_x= self.embeded(x)
    intermediate_hidden_state,(final_hidden_state, final_cell_state)= self.lstm(embedded_x) #returns 3 things
    output=self.fc(final_hidden_state.squeeze(0))
    return output

In [30]:
model= LstmModel(len(vocab))

In [31]:
device= torch.device("cuda")

In [32]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [33]:
model.to(device)

LstmModel(
  (embeded): Embedding(417, 100)
  (lstm): LSTM(100, 150, batch_first=True)
  (fc): Linear(in_features=150, out_features=417, bias=True)
)

In [34]:
epochs = 50
learning_rate = 0.001

criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [35]:
#training loop

for epoch in range(epochs):
  total_loss=0

  for batch_x, batch_y in dataloader:

    batch_x, batch_y = batch_x.to(device), batch_y.to(device)

    optimizer.zero_grad()

    output = model(batch_x)

    loss = criterion(output.squeeze(), batch_y)

    loss.backward()

    total_loss=total_loss+loss.item()

    optimizer.step()

  print(f"Epoch: {epoch + 1}, Loss: {total_loss:.4f}")





Epoch: 1, Loss: 142.9160
Epoch: 2, Loss: 127.0375
Epoch: 3, Loss: 118.7506
Epoch: 4, Loss: 111.6318
Epoch: 5, Loss: 104.9348
Epoch: 6, Loss: 97.3510
Epoch: 7, Loss: 90.1499
Epoch: 8, Loss: 82.3938
Epoch: 9, Loss: 74.7893
Epoch: 10, Loss: 67.9024
Epoch: 11, Loss: 61.7557
Epoch: 12, Loss: 54.9874
Epoch: 13, Loss: 49.4794
Epoch: 14, Loss: 43.5878
Epoch: 15, Loss: 38.6379
Epoch: 16, Loss: 34.2959
Epoch: 17, Loss: 30.0234
Epoch: 18, Loss: 26.6056
Epoch: 19, Loss: 23.5328
Epoch: 20, Loss: 20.7239
Epoch: 21, Loss: 18.1190
Epoch: 22, Loss: 16.3020
Epoch: 23, Loss: 14.4514
Epoch: 24, Loss: 13.1538
Epoch: 25, Loss: 11.5267
Epoch: 26, Loss: 10.4509
Epoch: 27, Loss: 9.3808
Epoch: 28, Loss: 8.6645
Epoch: 29, Loss: 7.9277
Epoch: 30, Loss: 7.2605
Epoch: 31, Loss: 6.6220
Epoch: 32, Loss: 6.0605
Epoch: 33, Loss: 5.6287
Epoch: 34, Loss: 5.2273
Epoch: 35, Loss: 4.9439
Epoch: 36, Loss: 4.5469
Epoch: 37, Loss: 4.2645
Epoch: 38, Loss: 4.1691
Epoch: 39, Loss: 3.8579
Epoch: 40, Loss: 3.6576
Epoch: 41, Loss: 3

In [36]:
def prediction(model,sentence,vocab):
  numerical_sentence= text_to_indicies(word_tokenize(sentence),vocab)

  #padding

  padded_text= torch.tensor([0]*(max(length)-len(numerical_sentence))+numerical_sentence,dtype=torch.long)

  output=model(padded_text.unsqueeze(0))

  value,index= torch.max(output, dim=1)

  return sentence+ " " + list(vocab.keys())[index.item()]

In [37]:
prediction(model,"Yes, just select past week doubt ", vocab)

'Yes, just select past week doubt  and'

In [45]:
input_text="Kashmir, often referred "

import time

num_tokens=100

for i in range(num_tokens):
  output_text= prediction(model,input_text,vocab)

  print(output_text)

  time.sleep(1)

  input_text=output_text

Kashmir, often referred  to
Kashmir, often referred  to as
Kashmir, often referred  to as “
Kashmir, often referred  to as “ Paradise
Kashmir, often referred  to as “ Paradise on
Kashmir, often referred  to as “ Paradise on Earth
Kashmir, often referred  to as “ Paradise on Earth ,
Kashmir, often referred  to as “ Paradise on Earth , ”
Kashmir, often referred  to as “ Paradise on Earth , ” is
Kashmir, often referred  to as “ Paradise on Earth , ” is one
Kashmir, often referred  to as “ Paradise on Earth , ” is one of
Kashmir, often referred  to as “ Paradise on Earth , ” is one of the
Kashmir, often referred  to as “ Paradise on Earth , ” is one of the most
Kashmir, often referred  to as “ Paradise on Earth , ” is one of the most breathtaking
Kashmir, often referred  to as “ Paradise on Earth , ” is one of the most breathtaking regions
Kashmir, often referred  to as “ Paradise on Earth , ” is one of the most breathtaking regions in
Kashmir, often referred  to as “ Paradise on Earth , ”

In [39]:
# Function to calculate accuracy
def calculate_accuracy(model, dataloader, device):
    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0

    with torch.no_grad():  # No need to compute gradients
        for batch_x, batch_y in dataloader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)

            # Get model predictions
            outputs = model(batch_x)

            # Get the predicted word indices
            _, predicted = torch.max(outputs, dim=1)

            # Compare with actual labels
            correct += (predicted == batch_y).sum().item()
            total += batch_y.size(0)

    accuracy = correct / total * 100
    return accuracy

# Compute accuracy
accuracy = calculate_accuracy(model, dataloader, device)
print(f"Model Accuracy: {accuracy:.2f}%")

Model Accuracy: 98.39%
