In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import numpy as np
import pandas as pd
import json

# Configuration - SIMPLIFIED for better learning
CONFIG = {
    "data_path": r"C:\Users\Admin\Desktop\3dstatisticallearning\Use Cases\Use Cases Machine Learning\Sebastian_Zahlenreihe\Predict Next Number\Predict Next Number\Mappe_nagelneu_new.xlsx",
    "sequence_length": 10,  
    "hidden_dim": 256,  # Reduced
    "num_heads": 8,
    "num_layers": 3,  # Reduced
    "dropout": 0.2,
    "learning_rate": 0.001,  # INCREASED - was too low!
    "batch_size": 32,  # Increased
    "epochs": 200,
    "patience": 25,
}

print("="*70)
print("STARTING TRAINING")
print("="*70)

print("\n[1/6] Loading dataset...")
data = pd.read_excel(CONFIG["data_path"], header=None)
dataset = data.values.astype(np.int64)

print(f"✓ Dataset loaded: {dataset.shape} (rows × columns)")
print(f"  Value range: [{dataset.min()}, {dataset.max()}]")

# Get unique values
unique_values = np.unique(dataset)
num_classes = len(unique_values)
value_to_idx = {int(val): idx for idx, val in enumerate(unique_values)}
idx_to_value = {idx: int(val) for idx, val in enumerate(unique_values)}

print(f"  Unique values: {num_classes}")

# Save mappings immediately
np.save("value_mappings.npy", unique_values)
print("✓ Value mappings saved")

# Convert dataset to indices
print("\n[2/6] Converting data to model format...")
dataset_indices = np.zeros_like(dataset, dtype=np.int64)
for i in range(dataset.shape[0]):
    for j in range(dataset.shape[1]):
        dataset_indices[i, j] = value_to_idx[int(dataset[i, j])]

print(f"✓ Data converted to indices (range: 0-{num_classes-1})")

# Save statistics
stats = {
    "min": int(dataset.min()),
    "max": int(dataset.max()),
    "mean": float(dataset.mean()),
    "std": float(dataset.std()),
    "num_columns": int(dataset.shape[1]),
    "num_classes": int(num_classes),
    "unique_values": unique_values.tolist()
}

with open("model_stats.json", "w") as f:
    json.dump(stats, f, indent=2)
print("✓ Statistics saved")

# Dataset class
class SequenceDataset(Dataset):
    def __init__(self, data, sequence_length):
        self.data = data
        self.sequence_length = sequence_length
        
    def __len__(self):
        return len(self.data) - self.sequence_length
    
    def __getitem__(self, idx):
        x = self.data[idx:idx + self.sequence_length]
        y = self.data[idx + self.sequence_length]
        return torch.LongTensor(x), torch.LongTensor(y)

# Simplified Model
class NumberPredictor(nn.Module):
    def __init__(self, num_classes, num_positions, hidden_dim, num_heads, num_layers, dropout=0.2):
        super(NumberPredictor, self).__init__()
        
        self.num_classes = num_classes
        self.num_positions = num_positions
        
        # Embeddings
        self.value_embedding = nn.Embedding(num_classes, hidden_dim)
        self.position_embedding = nn.Embedding(100, hidden_dim)
        
        # Transformer
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=hidden_dim,
            nhead=num_heads,
            dim_feedforward=hidden_dim * 2,
            dropout=dropout,
            batch_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers)
        
        # Output heads - one per position
        self.output_heads = nn.ModuleList([
            nn.Linear(hidden_dim, num_classes) for _ in range(num_positions)
        ])
        
    def forward(self, x):
        # x: (batch, seq_len, num_positions)
        batch_size, seq_len, num_pos = x.shape
        
        # Embed each value
        x_flat = x.reshape(batch_size * seq_len * num_pos)
        embedded = self.value_embedding(x_flat)
        embedded = embedded.reshape(batch_size, seq_len, num_pos, -1)
        
        # Average across positions to get sequence representation
        x = embedded.mean(dim=2)  # (batch, seq_len, hidden_dim)
        
        # Add positional encoding
        positions = torch.arange(seq_len, device=x.device)
        pos_emb = self.position_embedding(positions).unsqueeze(0)
        x = x + pos_emb
        
        # Transformer
        x = self.transformer(x)
        
        # Use last timestep
        x = x[:, -1, :]  # (batch, hidden_dim)
        
        # Predict each position
        outputs = [head(x) for head in self.output_heads]
        return torch.stack(outputs, dim=1)  # (batch, num_positions, num_classes)

# Prepare data
print("\n[3/6] Preparing training and validation sets...")
sequence_length = CONFIG["sequence_length"]
dataset_obj = SequenceDataset(dataset_indices, sequence_length)

train_size = int(0.85 * len(dataset_obj))
val_size = len(dataset_obj) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(
    dataset_obj, [train_size, val_size]
)

train_loader = DataLoader(train_dataset, batch_size=CONFIG["batch_size"], shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=CONFIG["batch_size"], shuffle=False)

print(f"✓ Training samples: {len(train_dataset)}")
print(f"✓ Validation samples: {len(val_dataset)}")

# Initialize model
print("\n[4/6] Initializing model...")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"✓ Using device: {device}")

model = NumberPredictor(
    num_classes=num_classes,
    num_positions=dataset.shape[1],
    hidden_dim=CONFIG["hidden_dim"],
    num_heads=CONFIG["num_heads"],
    num_layers=CONFIG["num_layers"],
    dropout=CONFIG["dropout"]
).to(device)

total_params = sum(p.numel() for p in model.parameters())
print(f"✓ Model created with {total_params:,} parameters")

# Training setup
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=CONFIG["learning_rate"])
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=10, verbose=True)

# Accuracy calculation
def calculate_accuracy(outputs, targets):
    predictions = torch.argmax(outputs, dim=2)
    correct = (predictions == targets).float()
    return correct.mean().item() * 100

# Training loop
print("\n[5/6] Starting training...")
print("="*70)

best_val_loss = float('inf')
best_val_acc = 0.0
patience_counter = 0

for epoch in range(CONFIG["epochs"]):
    # Training
    model.train()
    train_loss = 0.0
    train_acc = 0.0
    
    for batch_x, batch_y in train_loader:
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
        
        optimizer.zero_grad()
        outputs = model(batch_x)
        
        # Calculate loss for all positions
        loss = 0
        for i in range(outputs.shape[1]):
            loss += criterion(outputs[:, i, :], batch_y[:, i])
        loss = loss / outputs.shape[1]
        
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        
        train_loss += loss.item()
        train_acc += calculate_accuracy(outputs, batch_y)
    
    train_loss /= len(train_loader)
    train_acc /= len(train_loader)
    
    # Validation
    model.eval()
    val_loss = 0.0
    val_acc = 0.0
    
    with torch.no_grad():
        for batch_x, batch_y in val_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            outputs = model(batch_x)
            
            loss = 0
            for i in range(outputs.shape[1]):
                loss += criterion(outputs[:, i, :], batch_y[:, i])
            loss = loss / outputs.shape[1]
            
            val_loss += loss.item()
            val_acc += calculate_accuracy(outputs, batch_y)
    
    val_loss /= len(val_loader)
    val_acc /= len(val_loader)
    
    # Scheduler step
    scheduler.step(val_loss)
    
    # Print progress
    if (epoch + 1) % 5 == 0 or epoch == 0:
        print(f"Epoch {epoch+1:3d}/{CONFIG['epochs']} | "
              f"Loss: {train_loss:.4f}/{val_loss:.4f} | "
              f"Acc: {train_acc:.2f}%/{val_acc:.2f}%")
    
    # Save best model
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_val_acc = val_acc
        patience_counter = 0
        
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'train_loss': train_loss,
            'val_loss': val_loss,
            'val_acc': val_acc,
        }, "model.pth")
        
        print(f"  ✓ Model saved! Val Acc: {val_acc:.2f}%")
    else:
        patience_counter += 1
        if patience_counter >= CONFIG["patience"]:
            print(f"\nEarly stopping at epoch {epoch+1}")
            break

print("\n" + "="*70)
print("[6/6] Training completed!")
print(f"✓ Best validation accuracy: {best_val_acc:.2f}%")
print(f"✓ Best validation loss: {best_val_loss:.4f}")
print("="*70)

# Save config
CONFIG["best_val_acc"] = best_val_acc
CONFIG["best_val_loss"] = best_val_loss
with open("model_config.json", "w") as f:
    json.dump(CONFIG, f, indent=2)

print("\n✓ All files saved:")
print("  - model.pth")
print("  - model_stats.json")
print("  - model_config.json")
print("  - value_mappings.npy")
print("\nReady to run: python app.py")
print("="*70)

STARTING TRAINING

[1/6] Loading dataset...
✓ Dataset loaded: (7199, 20) (rows × columns)
  Value range: [1, 70]
  Unique values: 70
✓ Value mappings saved

[2/6] Converting data to model format...
✓ Data converted to indices (range: 0-69)
✓ Statistics saved

[3/6] Preparing training and validation sets...
✓ Training samples: 6110
✓ Validation samples: 1079

[4/6] Initializing model...
✓ Using device: cpu
✓ Model created with 1,984,632 parameters





[5/6] Starting training...
Epoch   1/200 | Loss: 4.3183/4.2799 | Acc: 1.44%/1.41%
  ✓ Model saved! Val Acc: 1.41%
  ✓ Model saved! Val Acc: 1.52%
  ✓ Model saved! Val Acc: 1.40%
  ✓ Model saved! Val Acc: 1.48%
Epoch   5/200 | Loss: 4.2515/4.2538 | Acc: 1.58%/1.39%
  ✓ Model saved! Val Acc: 1.39%
  ✓ Model saved! Val Acc: 1.55%
Epoch  10/200 | Loss: 4.2467/4.2525 | Acc: 1.65%/1.64%
  ✓ Model saved! Val Acc: 1.46%
Epoch  15/200 | Loss: 4.2458/4.2531 | Acc: 1.68%/1.49%
Epoch  20/200 | Loss: 4.2442/4.2530 | Acc: 1.72%/1.44%
Epoch  25/200 | Loss: 4.2442/4.2528 | Acc: 1.77%/1.53%
Epoch  30/200 | Loss: 4.2436/4.2530 | Acc: 1.80%/1.57%
Epoch  35/200 | Loss: 4.2436/4.2534 | Acc: 1.77%/1.53%

Early stopping at epoch 36

[6/6] Training completed!
✓ Best validation accuracy: 1.46%
✓ Best validation loss: 4.2518

✓ All files saved:
  - model.pth
  - model_stats.json
  - model_config.json
  - value_mappings.npy

Ready to run: python app.py
