In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/rnn-char/alice.txt


# RNN

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# Load and preprocess data
with open("/kaggle/input/rnn-char/alice.txt", 'r', encoding='utf-8') as f:
    text = f.read().lower()

# Create character to integer and integer to character mappings
chars = sorted(set(text))
char_to_int = {c: i for i, c in enumerate(chars)}
int_to_char = {i: c for i, c in enumerate(chars)}

# Encode text as integers
encoded_text = [char_to_int[c] for c in text]

# Hyperparameters
seq_length = 100
batch_size = 64
n_epochs = 50
learning_rate = 0.003

# Create sequences
def create_sequences(data, seq_length):
    sequences = []
    targets = []
    for i in range(0, len(data) - seq_length):
        sequences.append(data[i:i + seq_length])
        targets.append(data[i + seq_length])
    return np.array(sequences), np.array(targets)

X, y = create_sequences(encoded_text, seq_length)

# Convert to tensors
X = torch.tensor(X, dtype=torch.long)
y = torch.tensor(y, dtype=torch.long)

# DataLoader
dataset = torch.utils.data.TensorDataset(X, y)
loader = torch.utils.data.DataLoader(dataset, shuffle=True, batch_size=batch_size)


In [None]:
# Define the RNN Model
class CharRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, n_layers=1):
        super(CharRNN, self).__init__()
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.RNN(hidden_size, hidden_size, n_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        x = self.embedding(x)
        out, hidden = self.rnn(x, hidden)
        out = self.fc(out[:, -1])
        return out, hidden

    def init_hidden(self, batch_size):
        return torch.zeros(self.n_layers, batch_size, self.hidden_size)

# Model parameters
input_size = len(chars)
hidden_size = 256
output_size = len(chars)
n_layers = 2

# Initialize the model
model = CharRNN(input_size, hidden_size, output_size, n_layers)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()  # CrossEntropy for character prediction
optimizer = optim.Adam(model.parameters(), lr=learning_rate)  # Adam optimizer


In [None]:
# Training function
def train_model(model, loader, n_epochs=n_epochs):
    model.train()
    for epoch in range(n_epochs):
        epoch_loss = 0
        
        for X_batch, y_batch in loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            
            # Initialize hidden state with the current batch size
            batch_size = X_batch.size(0)
            hidden = model.init_hidden(batch_size).to(device)
            
            # Detach hidden state to prevent backprop through previous steps
            hidden = hidden.detach()
            
            # Zero the gradients
            optimizer.zero_grad()
            
            # Forward pass
            output, hidden = model(X_batch, hidden)
            
            # Compute loss
            loss = criterion(output, y_batch)
            
            # Backpropagation and optimization
            loss.backward()
            optimizer.step()
            
            # Accumulate loss
            epoch_loss += loss.item()

        print(f'Epoch {epoch+1}/{n_epochs}, Loss: {epoch_loss / len(loader):.4f}')

# Train the model
train_model(model, loader, n_epochs=n_epochs)


Epoch 1/50, Loss: 1.9705
Epoch 2/50, Loss: 1.8259
Epoch 3/50, Loss: 1.8048
Epoch 4/50, Loss: 1.7981
Epoch 5/50, Loss: 1.7975
Epoch 6/50, Loss: 1.7986
Epoch 7/50, Loss: 1.8114
Epoch 8/50, Loss: 1.8168
Epoch 9/50, Loss: 1.8272
Epoch 10/50, Loss: 1.8243
Epoch 11/50, Loss: 1.8222
Epoch 12/50, Loss: 1.8303
Epoch 13/50, Loss: 1.8352
Epoch 14/50, Loss: 1.8469
Epoch 15/50, Loss: 1.8581
Epoch 16/50, Loss: 1.8720
Epoch 17/50, Loss: 1.8762
Epoch 18/50, Loss: 1.8898
Epoch 19/50, Loss: 1.8808
Epoch 20/50, Loss: 1.8962
Epoch 21/50, Loss: 1.9345
Epoch 22/50, Loss: 1.9200
Epoch 23/50, Loss: 1.9079
Epoch 24/50, Loss: 1.9027
Epoch 25/50, Loss: 1.9122
Epoch 26/50, Loss: 1.9215
Epoch 27/50, Loss: 1.9256
Epoch 28/50, Loss: 1.9348
Epoch 29/50, Loss: 1.9213
Epoch 30/50, Loss: 1.9255
Epoch 31/50, Loss: 1.9357
Epoch 32/50, Loss: 1.9122
Epoch 33/50, Loss: 1.9201
Epoch 34/50, Loss: 1.9418
Epoch 35/50, Loss: 1.9327
Epoch 36/50, Loss: 1.9307
Epoch 37/50, Loss: 1.9347
Epoch 38/50, Loss: 1.9537
Epoch 39/50, Loss: 2.

In [10]:
# Text generation with temperature
def generate_text(model, start_str, gen_length=1000, temperature=0.01):
    model.eval()
    hidden = model.init_hidden(1).to(device)
    input = torch.tensor([char_to_int[ch] for ch in start_str], dtype=torch.long).unsqueeze(0).to(device)
    generated_text = start_str

    for _ in range(gen_length):
        output, hidden = model(input, hidden)
        output = output.div(temperature).exp()
        char_idx = torch.multinomial(output, 1).item()
        char = int_to_char[char_idx]
        generated_text += char
        input = torch.tensor([[char_idx]], dtype=torch.long).to(device)
    
    return generated_text

# Generate text from a seed
seed_text = "alice was beginning to get very tired"
print(generate_text(model, seed_text, gen_length=1000, temperature=0.654321))


alice was beginning to get very tired of cours lirhat the waster qurt, and voin a casenber.

the use fighin.

the had mistent therum of sork this grown alice offer the courd that could a she exritle of court the rards of the she con’t the cound this the out that she rermire ontion she cours at sorge was of that of this to frong thee. shis the from this from that it and and thing alice. “as of spat it and far a lith arrice, agair of the or list and ter ond a she like caser my shis a lith
of such that lith in a casth a the mat sithire liked of thoudn’t the elor as ond the juring her offure the the right this grick i thrill you fastory a lirher havent the reav asrise of thill the casting varch you as rarrioustion. and the roest ass very of making the waster with and si a stlar you mare mi wast a roes a ver lird of the was of this office office, and the can the from she quout lik sit a core, on she she on sith the fithive of or the cound its alice rut lith very offs down withit it thourter