In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import pickle
import os

In [None]:
class VehicleNeuralNetwork(nn.Module):
    def __init__(self, input_dim, output_dim, hidden_layers=[128, 64, 32]):
        super(VehicleNeuralNetwork, self).__init__()
        
        layers = []
        prev_dim = input_dim
        
        for hidden_dim in hidden_layers:
            layers.append(nn.Linear(prev_dim, hidden_dim))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(0.2))
            prev_dim = hidden_dim
        
        layers.append(nn.Linear(prev_dim, output_dim))
        
        self.network = nn.Sequential(*layers)
        
    def forward(self, x):
        return self.network(x)


In [None]:

class VehicleDataProcessor:
    def __init__(self, csv_path):
        self.csv_path = csv_path
        self.scaler_input = StandardScaler()
        self.scaler_output = StandardScaler()
        self.input_columns = []
        self.output_columns = []
        
    def preprocess_dataset(self):
        """Preprocess the CSV dataset for supervised learning"""
        print("Loading dataset...")
        df = pd.read_csv(self.csv_path)
        
        # Remove rows with NaN or infinite values
        df = df.replace([np.inf, -np.inf], np.nan)
        df = df.dropna()
        
        print(f"Dataset shape after cleaning: {df.shape}")
        
        # Create shifted dataframe for k+1 timestamp
        df_next = df.shift(-1)
        
        # Remove the last row as it won't have a next timestamp
        df = df[:-1]
        df_next = df_next[:-1]
        
        # Store original column names
        original_cols = df.columns.tolist()
        
        # Rename columns for current and next timesteps
        df_current = df.copy()
        df_next = df_next.copy()
        
        # Rename columns to distinguish current and next
        df_current.columns = [col + '_curr' for col in original_cols]
        df_next.columns = [col + '_next' for col in original_cols]
        
        # Combine current and next dataframes
        df_combined = pd.concat([df_current, df_next], axis=1)
        
        # Remove timestamp columns
        timestamp_cols = [col for col in df_combined.columns if 'timestamp' in col]
        df_combined = df_combined.drop(columns=timestamp_cols)
        
        # Remove k+1 control inputs (throttle_pwm, steering_pwm, sampling_time)
        control_cols_next = ['throttle_pwm_next', 'steering_pwm_next', 'sampling_time_next']
        df_combined = df_combined.drop(columns=control_cols_next, errors='ignore')
        
        # Define input and output columns
        self.input_columns = [col for col in df_combined.columns if col.endswith('_curr')]
        self.output_columns = [col for col in df_combined.columns if col.endswith('_next')]
        
        print(f"Input features: {len(self.input_columns)}")
        print(f"Output features: {len(self.output_columns)}")
        
        return df_combined
    
    def prepare_training_data(self, df_processed, test_size=0.2):
        """Prepare training and validation datasets"""
        
        # Extract input and output data
        X = df_processed[self.input_columns].values.astype(np.float32)
        y = df_processed[self.output_columns].values.astype(np.float32)
        
        # Split into train and test sets
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=test_size, random_state=42
        )
        
        # Normalize the data
        X_train_scaled = self.scaler_input.fit_transform(X_train)
        X_test_scaled = self.scaler_input.transform(X_test)
        
        y_train_scaled = self.scaler_output.fit_transform(y_train)
        y_test_scaled = self.scaler_output.transform(y_test)
        
        # Convert to PyTorch tensors
        X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
        X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
        y_train_tensor = torch.tensor(y_train_scaled, dtype=torch.float32)
        y_test_tensor = torch.tensor(y_test_scaled, dtype=torch.float32)
        
        return (X_train_tensor, X_test_tensor, y_train_tensor, y_test_tensor)


In [None]:

class VehicleNNTrainer:
    def __init__(self, model, device='cpu'):
        self.model = model.to(device)
        self.device = device
        self.train_losses = []
        self.val_losses = []
        
    def train_model(self, X_train, X_val, y_train, y_val, 
                   epochs=100, batch_size=64, learning_rate=0.001):
        """Train the neural network model"""
        
        # Create data loaders
        train_dataset = TensorDataset(X_train, y_train)
        val_dataset = TensorDataset(X_val, y_val)
        
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
        
        # Define loss function and optimizer
        criterion = nn.MSELoss()
        optimizer = optim.Adam(self.model.parameters(), lr=learning_rate, weight_decay=1e-5)
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=10, factor=0.5)
        
        print("Starting training...")
        
        for epoch in range(epochs):
            # Training phase
            self.model.train()
            train_loss = 0.0
            
            for batch_x, batch_y in train_loader:
                batch_x, batch_y = batch_x.to(self.device), batch_y.to(self.device)
                
                optimizer.zero_grad()
                outputs = self.model(batch_x)
                loss = criterion(outputs, batch_y)
                loss.backward()
                optimizer.step()
                
                train_loss += loss.item()
            
            # Validation phase
            self.model.eval()
            val_loss = 0.0
            
            with torch.no_grad():
                for batch_x, batch_y in val_loader:
                    batch_x, batch_y = batch_x.to(self.device), batch_y.to(self.device)
                    outputs = self.model(batch_x)
                    loss = criterion(outputs, batch_y)
                    val_loss += loss.item()
            
            # Calculate average losses
            avg_train_loss = train_loss / len(train_loader)
            avg_val_loss = val_loss / len(val_loader)
            
            self.train_losses.append(avg_train_loss)
            self.val_losses.append(avg_val_loss)
            
            # Learning rate scheduling
            scheduler.step(avg_val_loss)
            
            if epoch % 10 == 0:
                print(f"Epoch {epoch+1}/{epochs}")
                print(f"Train Loss: {avg_train_loss:.6f}, Val Loss: {avg_val_loss:.6f}")
                print(f"Learning Rate: {optimizer.param_groups[0]['lr']:.8f}")
                print("-" * 50)
    
    def plot_training_history(self, save_path='/tmp/training_history.png'):
        """Plot training and validation loss"""
        plt.figure(figsize=(10, 6))
        plt.plot(self.train_losses, label='Training Loss')
        plt.plot(self.val_losses, label='Validation Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.title('Training History')
        plt.legend()
        plt.grid(True)
        plt.savefig(save_path)
        plt.close()
        print(f"Training history plot saved to {save_path}")


In [None]:

def main():
    # Configuration
    csv_path = '/tmp/vehicle_data_log.csv'
    model_save_path = '/tmp/vehicle_nn_model.pth'
    scaler_save_path = '/tmp/vehicle_scalers.pkl'
    config_save_path = '/tmp/vehicle_model_config.pkl'
    
    # Check if CSV file exists
    if not os.path.exists(csv_path):
        print(f"CSV file not found: {csv_path}")
        print("Please run the data logger first to collect training data.")
        return
    
    # Process data
    processor = VehicleDataProcessor(csv_path)
    df_processed = processor.preprocess_dataset()
    
    # Prepare training data
    X_train, X_test, y_train, y_test = processor.prepare_training_data(df_processed)
    
    print(f"Training data shape: {X_train.shape}")
    print(f"Training labels shape: {y_train.shape}")
    
    # Create model
    input_dim = X_train.shape[1]
    output_dim = y_train.shape[1]
    
    model = VehicleNeuralNetwork(input_dim, output_dim)
    
    # Train model
    trainer = VehicleNNTrainer(model)
    trainer.train_model(X_train, X_test, y_train, y_test, epochs=100)
    
    # Save model
    torch.save(model.state_dict(), model_save_path)
    print(f"Model saved to {model_save_path}")
    
    # Save scalers and configuration
    model_config = {
        'input_dim': input_dim,
        'output_dim': output_dim,
        'input_columns': processor.input_columns,
        'output_columns': processor.output_columns,
        'scaler_input': processor.scaler_input,
        'scaler_output': processor.scaler_output
    }
    
    with open(config_save_path, 'wb') as f:
        pickle.dump(model_config, f)
    
    print(f"Model configuration saved to {config_save_path}")
    
    # Plot training history
    trainer.plot_training_history()
    
    print("Training completed successfully!")

if __name__ == '__main__':
    main()


In [None]:
df = pd.read_csv('vehicle_data_log.csv')