In [None]:
import os
import string
import random
import torch
import torch.nn as nn
import torch.optim as optim

# Character set definition
all_characters = string.ascii_letters + " .,;'-!?()"
n_characters = len(all_characters)
char_to_index = {ch: i for i, ch in enumerate(all_characters)}

def line_to_tensor(line):
    """Convert string to tensor with correct dimensions"""
    tensor = torch.zeros(1, len(line), n_characters)  # Changed dimensions to (batch, seq, features)
    for i, char in enumerate(line):
        if char in char_to_index:
            tensor[0][i][char_to_index[char]] = 1
    return tensor

class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNNModel, self).__init__()
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, x, hidden):
        # x shape: (batch, seq, feature)
        out, hidden = self.rnn(x, hidden)
        # Use the output from the last time step
        out = self.fc(out[:, -1, :])  # Changed indexing for batch_first=True
        out = self.softmax(out)
        return out, hidden

    def init_hidden(self, batch_size=1):
        # Shape: (num_layers * num_directions, batch, hidden_size)
        return torch.zeros(1, batch_size, self.hidden_size)

def load_eng_fra_data(file_path):
    """Load English-French data"""
    data = []
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            for line in f:
                parts = line.strip().split('\t')
                if len(parts) == 2:
                    data.append((parts[0], 'English'))
                    data.append((parts[1], 'French'))
    except FileNotFoundError:
        print(f"Error: Could not find file {file_path}")
        return []
    return data

def load_names_data(folder_path):
    """Load names data"""
    data = []
    categories = []

    try:
        for filename in os.listdir(folder_path):
            if filename.endswith(".txt"):
                category = os.path.splitext(filename)[0]
                categories.append(category)

                with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as f:
                    names = f.read().strip().split('\n')
                    data.extend([(name, category) for name in names])
    except FileNotFoundError:
        print(f"Error: Could not find folder {folder_path}")
        return [], []

    return data, categories

def random_training_example(data, category_indices):
    """Generate a random training example"""
    line, category = random.choice(data)
    category_tensor = torch.tensor([category_indices.index(category)], dtype=torch.long)
    line_tensor = line_to_tensor(line)
    return category, line, category_tensor, line_tensor

def train(model, data, category_indices, n_iters=1000, lr=0.005, print_every=100):
    """Train the model"""
    model.train()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.NLLLoss()

    total_loss = 0
    current_loss = 0
    all_losses = []

    try:
        for iter in range(1, n_iters + 1):
            category, line, category_tensor, line_tensor = random_training_example(data, category_indices)

            # Initialize hidden state with correct batch size
            hidden = model.init_hidden(batch_size=line_tensor.size(0))

            model.zero_grad()
            output, hidden = model(line_tensor, hidden)

            loss = criterion(output, category_tensor)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            current_loss += loss.item()

            if iter % print_every == 0:
                avg_loss = current_loss / print_every
                print(f'Iteration: {iter}/{n_iters} | Loss: {avg_loss:.4f} | Example: {line} -> {category}')
                all_losses.append(avg_loss)
                current_loss = 0

    except KeyboardInterrupt:
        print("\nTraining interrupted by user")
    except Exception as e:
        print(f"Error during training: {str(e)}")
        raise  # Re-raise the exception for debugging

    return all_losses

def evaluate(model, line, category_indices):
    """Evaluate the model on a single input"""
    model.eval()
    with torch.no_grad():
        line_tensor = line_to_tensor(line)
        hidden = model.init_hidden(batch_size=line_tensor.size(0))
        output, hidden = model(line_tensor, hidden)

        # Get top prediction
        _, predicted = output.topk(1)
        category_idx = predicted.item()
        predicted_category = category_indices[category_idx]

        # Get probability
        prob = torch.exp(output[0][category_idx]).item()

        return predicted_category, prob

def main():
    # Hyperparameters
    hidden_size = 128
    n_iters = 10000
    learning_rate = 0.005

    # Load data
    binary_data = load_eng_fra_data('./data/eng-fra.txt')
    names_data, categories = load_names_data('./data/names')

    if not binary_data and not names_data:
        print("No data loaded. Please check file paths.")
        return

    # Binary classification
    if binary_data:
        print("\nTraining binary classifier (English-French)...")
        binary_categories = ['English', 'French']
        model_binary = RNNModel(n_characters, hidden_size, len(binary_categories))
        binary_losses = train(model_binary, binary_data, binary_categories, n_iters, learning_rate)

        # Test binary classifier
        test_words = ['hello', 'bonjour', 'world', 'monde']
        print("\nTesting binary classifier:")
        for word in test_words:
            pred_category, confidence = evaluate(model_binary, word, binary_categories)
            print(f"'{word}' -> {pred_category} ({confidence:.2%} confident)")

    # Multiclass classification
    if names_data:
        print("\nTraining multiclass classifier (Names)...")
        model_multiclass = RNNModel(n_characters, hidden_size, len(categories))
        multiclass_losses = train(model_multiclass, names_data, categories, n_iters, learning_rate)

        # Test multiclass classifier
        test_names = ['Mary', 'Giovanni', 'Chen', 'Satoshi']
        print("\nTesting multiclass classifier:")
        for name in test_names:
            pred_category, confidence = evaluate(model_multiclass, name, categories)
            print(f"'{name}' -> {pred_category} ({confidence:.2%} confident)")

if __name__ == "__main__":
    main()


Training binary classifier (English-French)...
Iteration: 100/10000 | Loss: 0.9020 | Example: Peuvent-elles me voir ? -> French
Iteration: 200/10000 | Loss: 0.9808 | Example: I am glad that you have succeeded. -> English
Iteration: 300/10000 | Loss: 0.8548 | Example: The light has turned green. -> English
Iteration: 400/10000 | Loss: 0.9540 | Example: Let him go. -> English
Iteration: 500/10000 | Loss: 0.8260 | Example: A revolution broke out in Mexico. -> English
Iteration: 600/10000 | Loss: 0.8481 | Example: My wife is cooking. -> English
Iteration: 700/10000 | Loss: 0.8973 | Example: Je voulais simplement m'en assurer. -> French
Iteration: 800/10000 | Loss: 0.9348 | Example: I'm stubborn. -> English
Iteration: 900/10000 | Loss: 0.8466 | Example: Tom is the one that doesn't like me. -> English
Iteration: 1000/10000 | Loss: 0.8746 | Example: La fermeture Éclair de ton pantalon est baissée. -> French
Iteration: 1100/10000 | Loss: 0.9640 | Example: Il y a toujours une première fois pou

In [None]:
import os
import string
import random
import torch
import torch.nn as nn
import torch.optim as optim
import time

# Character set definition
all_characters = string.ascii_letters + " .,;'-!?()"
n_characters = len(all_characters)
char_to_index = {ch: i for i, ch in enumerate(all_characters)}

def line_to_tensor(line):
    """Convert string to tensor with correct dimensions"""
    tensor = torch.zeros(1, len(line), n_characters)  # (batch, seq, features)
    for i, char in enumerate(line):
        if char in char_to_index:
            tensor[0][i][char_to_index[char]] = 1
    return tensor

class LSTMClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, dropout=0.2):
        super(LSTMClassifier, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0
        )
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, x, hidden):
        # x shape: (batch, seq, feature)
        out, (hidden, cell) = self.lstm(x, hidden)
        out = self.dropout(out[:, -1, :])  # Use last output
        out = self.fc(out)
        out = self.softmax(out)
        return out, (hidden, cell)

    def init_hidden(self, batch_size=1):
        # (num_layers, batch, hidden_size)
        return (torch.zeros(self.num_layers, batch_size, self.hidden_size),
                torch.zeros(self.num_layers, batch_size, self.hidden_size))

class GRUClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, dropout=0.2):
        super(GRUClassifier, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.gru = nn.GRU(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0
        )
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, x, hidden):
        # x shape: (batch, seq, feature)
        out, hidden = self.gru(x, hidden)
        out = self.dropout(out[:, -1, :])  # Use last output
        out = self.fc(out)
        out = self.softmax(out)
        return out, hidden

    def init_hidden(self, batch_size=1):
        # (num_layers, batch, hidden_size)
        return torch.zeros(self.num_layers, batch_size, self.hidden_size)

def load_data(data_path, data_type='names'):
    """Load either names or eng-fra data"""
    if data_type == 'names':
        data = []
        categories = []
        for filename in os.listdir(data_path):
            if filename.endswith(".txt"):
                category = os.path.splitext(filename)[0]
                categories.append(category)
                with open(os.path.join(data_path, filename), 'r', encoding='utf-8') as f:
                    names = f.read().strip().split('\n')
                    data.extend([(name, category) for name in names])
        return data, categories
    else:  # eng-fra
        data = []
        with open(data_path, 'r', encoding='utf-8') as f:
            for line in f:
                parts = line.strip().split('\t')
                if len(parts) == 2:
                    data.append((parts[0], 'English'))
                    data.append((parts[1], 'French'))
        return data, ['English', 'French']

def train_model(model_class, data, categories, model_params, train_params):
    """Train a model with given parameters"""
    # Create model
    model = model_class(**model_params)
    model.train()

    # Training setup
    criterion = nn.NLLLoss()
    optimizer = optim.Adam(model.parameters(), lr=train_params['learning_rate'])

    # Training loop
    start_time = time.time()
    current_loss = 0
    all_losses = []

    for iter in range(1, train_params['n_iterations'] + 1):
        # Get random training example
        line, category = random.choice(data)
        category_tensor = torch.tensor([categories.index(category)], dtype=torch.long)
        line_tensor = line_to_tensor(line)

        # Training step
        hidden = model.init_hidden()
        model.zero_grad()

        output, hidden = model(line_tensor, hidden)
        loss = criterion(output, category_tensor)

        loss.backward()
        optimizer.step()

        current_loss += loss.item()

        # Print progress
        if iter % train_params['print_every'] == 0:
            avg_loss = current_loss / train_params['print_every']
            elapsed = time.time() - start_time
            print(f'Iteration: {iter}/{train_params["n_iterations"]} | '
                  f'Loss: {avg_loss:.4f} | Time: {elapsed:.1f}s | '
                  f'Example: {line} -> {category}')
            current_loss = 0
            all_losses.append(avg_loss)

    return model, all_losses

def evaluate(model, line, categories):
    """Evaluate model on single input"""
    model.eval()
    with torch.no_grad():
        line_tensor = line_to_tensor(line)
        hidden = model.init_hidden()
        output, _ = model(line_tensor, hidden)

        # Get prediction
        _, predicted = output.topk(1)
        category_idx = predicted.item()
        predicted_category = categories[category_idx]

        # Get confidence
        prob = torch.exp(output[0][category_idx]).item()

        return predicted_category, prob

def main():
    # Model parameters
    model_params = {
        'input_size': n_characters,
        'hidden_size': 256,
        'output_size': None,  # Will be set based on data
        'num_layers': 2,
        'dropout': 0.2
    }

    # Training parameters
    train_params = {
        'n_iterations': 10000,
        'learning_rate': 0.001,
        'print_every': 500
    }

    # Load and train on both datasets with both models
    for data_type in ['binary', 'multiclass']:
        print(f"\nTraining on {data_type} classification task...")

        # Load appropriate data
        if data_type == 'binary':
            data, categories = load_data('./data/eng-fra.txt', 'eng-fra')
        else:
            data, categories = load_data('./data/names', 'names')

        model_params['output_size'] = len(categories)

        # Train LSTM
        print("\nTraining LSTM model...")
        lstm_model, lstm_losses = train_model(
            LSTMClassifier, data, categories, model_params, train_params)

        # Train GRU
        print("\nTraining GRU model...")
        gru_model, gru_losses = train_model(
            GRUClassifier, data, categories, model_params, train_params)

        # Test both models
        test_words = ['hello', 'bonjour', 'world', 'monde'] if data_type == 'binary' else \
                     ['Mary', 'Giovanni', 'Chen', 'Satoshi']

        print("\nTesting models:")
        for word in test_words:
            # Test LSTM
            lstm_pred, lstm_conf = evaluate(lstm_model, word, categories)
            # Test GRU
            gru_pred, gru_conf = evaluate(gru_model, word, categories)

            print(f"\nInput: '{word}'")
            print(f"LSTM: {lstm_pred} ({lstm_conf:.2%} confident)")
            print(f"GRU:  {gru_pred} ({gru_conf:.2%} confident)")

if __name__ == "__main__":
    main()


Training on binary classification task...

Training LSTM model...
Iteration: 500/10000 | Loss: 0.7373 | Time: 9.8s | Example: Her father works at the bank. -> English
Iteration: 1000/10000 | Loss: 0.7046 | Time: 19.8s | Example: He will wait. -> English
Iteration: 1500/10000 | Loss: 0.7033 | Time: 30.2s | Example: Je ne peux rien faire. -> French
Iteration: 2000/10000 | Loss: 0.6919 | Time: 40.4s | Example: Je pense que c'est bien pour toi que tu lises ce livre. -> French
Iteration: 2500/10000 | Loss: 0.7000 | Time: 50.3s | Example: Ils se sont mis d'accord pour commencer tôt. -> French
Iteration: 3000/10000 | Loss: 0.6975 | Time: 60.0s | Example: I am a tourist. -> English
Iteration: 3500/10000 | Loss: 0.6980 | Time: 70.3s | Example: Combien de canettes de bière avez-vous bu ? -> French
Iteration: 4000/10000 | Loss: 0.7021 | Time: 80.6s | Example: J'étais seul sans elle. -> French
Iteration: 4500/10000 | Loss: 0.6982 | Time: 91.0s | Example: Il y a quelque chose que je dois te dire a