In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# 1. Data Preparation
paragraph = "PyTorch makes building neural networks simple. LSTM is useful for sequence prediction tasks. Predicting the next word is a fun application."
words = paragraph.lower().replace('.', '').replace(',', '').split()
vocab = sorted(set(words))
word_to_idx = {w: i for i, w in enumerate(vocab)}
idx_to_word = {i: w for w, i in word_to_idx.items()}

# Create input-output pairs (sequence length = 3)
seq_length = 3
inputs, targets = [], []
for i in range(len(words) - seq_length):
    inputs.append([word_to_idx[w] for w in words[i:i+seq_length]])
    targets.append(word_to_idx[words[i+seq_length]])
inputs = torch.tensor(inputs)
targets = torch.tensor(targets)

# 2. Model
class LSTMNextWord(nn.Module):
    def __init__(self, vocab_size, embed_size, hidden_size):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.lstm = nn.LSTM(embed_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)
    def forward(self, x):
        x = self.embedding(x)
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])  # Only last output for next-word
        return out

# 3. Training
vocab_size = len(vocab)
embed_size = 10
hidden_size = 32
model = LSTMNextWord(vocab_size, embed_size, hidden_size)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

n_epochs = 200
for epoch in range(n_epochs):
    model.train()
    optimizer.zero_grad()
    output = model(inputs)
    loss = loss_fn(output, targets)
    loss.backward()
    optimizer.step()
    if epoch % 40 == 0:
        print(f"Epoch {epoch} Loss: {loss.item():.4f}")

# 4. Prediction
def predict_next(model, text, word_to_idx, idx_to_word):
    model.eval()
    words = text.lower().replace('.', '').replace(',', '').split()
    seq = [word_to_idx.get(w, 0) for w in words[-seq_length:]]
    x = torch.tensor(seq).unsqueeze(0)
    with torch.no_grad():
        out = model(x)
        pred_idx = out.argmax(dim=1).item()
        return idx_to_word[pred_idx]

# Example usage:
test_text = "lstm is useful for"
next_word = predict_next(model, test_text, word_to_idx, idx_to_word)
print(f"Input: '{test_text}' -> Predicted next word: '{next_word}'")

Epoch 0 Loss: 3.0028
Epoch 40 Loss: 0.0339
Epoch 80 Loss: 0.0043
Epoch 120 Loss: 0.0026
Epoch 160 Loss: 0.0018
Input: 'lstm is useful for' -> Predicted next word: 'sequence'
