# Optimized Neural Network - Maximum Accuracy

## Implementierte Verbesserungen:
1. **Ensemble Learning** - 5 Modelle mit verschiedenen Seeds
2. **Residual Connections** - Skip Connections für tiefere Netze
3. **Huber Loss** - Robuster gegen Outliers als MSE
4. **Advanced Features** - Polynomial & Interaction Features
5. **Stochastic Weight Averaging** - Bessere Generalisierung
6. **Data Augmentation** - 5x mehr Trainingsdaten

In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import warnings
warnings.filterwarnings('ignore')

# Device
if torch.backends.mps.is_available():
    device = torch.device('mps')
elif torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

In [2]:
# ===== ADVANCED FEATURE ENGINEERING =====
def advanced_feature_engineering(df):
    """Erweiterte Feature Engineering Pipeline"""
    df = df.copy()
    
    # Basic Features
    df['rooms_per_household'] = df['total_rooms'] / (df['households'] + 1)
    df['bedrooms_ratio'] = df['total_bedrooms'] / (df['total_rooms'] + 1)
    df['population_density'] = df['population'] / (df['households'] + 1)
    df['income_per_bedroom'] = df['median_income'] / (df['total_bedrooms'] + 1)
    df['rooms_per_person'] = df['total_rooms'] / (df['population'] + 1)
    df['bedrooms_per_person'] = df['total_bedrooms'] / (df['population'] + 1)
    
    # Advanced Features
    df['income_squared'] = df['median_income'] ** 2
    df['income_cubed'] = df['median_income'] ** 3
    df['age_per_income'] = df['housing_median_age'] / (df['median_income'] + 1)
    
    # Interaction Features (wichtigste Kombinationen)
    df['income_x_rooms'] = df['median_income'] * df['rooms_per_household']
    df['income_x_age'] = df['median_income'] * df['housing_median_age']
    df['location_interaction'] = df['longitude'] * df['latitude']
    
    # Geospatial Features
    df['distance_to_center'] = np.sqrt(df['longitude']**2 + df['latitude']**2)
    
    return df

def preprocess_data(train_df, test_df):
    """Complete Preprocessing Pipeline"""
    target_col = 'median_house_value'
    X_train = train_df.drop(columns=[target_col])
    y_train = train_df[target_col].values
    
    # Outlier Removal
    lower_q = np.percentile(y_train, 1)
    mask = y_train >= lower_q
    X_train = X_train[mask]
    y_train = y_train[mask]
    
    # Advanced Feature Engineering
    X_train = advanced_feature_engineering(X_train)
    X_test = advanced_feature_engineering(test_df.copy())
    
    # Remove IDs
    X_train = X_train.drop(columns=['id'], errors='ignore')
    X_test = X_test.drop(columns=['id'], errors='ignore')
    
    # One-Hot Encoding
    cat_cols = X_train.select_dtypes(include=['object']).columns.tolist()
    if cat_cols:
        X_train = pd.get_dummies(X_train, columns=cat_cols, drop_first=True)
        X_test = pd.get_dummies(X_test, columns=cat_cols, drop_first=True)
        for col in set(X_train.columns) - set(X_test.columns):
            X_test[col] = 0
        X_test = X_test[X_train.columns]
    
    # Imputation
    imputer = SimpleImputer(strategy='median')
    X_train = pd.DataFrame(imputer.fit_transform(X_train), columns=X_train.columns)
    X_test = pd.DataFrame(imputer.transform(X_test), columns=X_test.columns)
    
    return X_train, y_train, X_test

In [3]:
# Load Data
train_df = pd.read_csv('../train.csv')
test_df = pd.read_csv('../test.csv')
X_train_full, y_train_full, X_test = preprocess_data(train_df, test_df)

In [4]:
# ===== DATA AUGMENTATION =====
def augment_data(X, y, noise_level=0.03, augment_factor=5):
    """Data Augmentation with Gaussian Noise"""
    X_list = [X]
    y_list = [y]
    feature_std = np.std(X, axis=0)
    
    for i in range(augment_factor - 1):
        noise = np.random.normal(0, noise_level, X.shape) * feature_std
        X_noisy = X + noise
        y_noise = np.random.normal(1.0, 0.01, y.shape)
        y_noisy = y * y_noise
        X_list.append(X_noisy)
        y_list.append(y_noisy)
    
    return np.vstack(X_list), np.hstack(y_list)

In [5]:
# Train/Val Split
X_train, X_val, y_train, y_val = train_test_split(
    X_train_full, y_train_full, test_size=0.15, random_state=42
)

# Augmentation (nur auf Training)
X_train_aug, y_train_aug = augment_data(X_train.values, y_train, noise_level=0.03, augment_factor=5)

# Target Transformation
y_train_log = np.log1p(y_train_aug)
y_val_log = np.log1p(y_val)

y_scaler = StandardScaler()
y_train_scaled = y_scaler.fit_transform(y_train_log.reshape(-1, 1)).flatten()
y_val_scaled = y_scaler.transform(y_val_log.reshape(-1, 1)).flatten()

# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_aug)
X_val_scaled = scaler.transform(X_val.values)
X_test_scaled = scaler.transform(X_test)

In [6]:
# ===== RESIDUAL NEURAL NETWORK =====
class ResidualBlock(nn.Module):
    """Residual Block with Skip Connection"""
    def __init__(self, dim, dropout_rate=0.25):
        super(ResidualBlock, self).__init__()
        self.fc = nn.Linear(dim, dim)
        self.bn = nn.BatchNorm1d(dim)
        self.dropout = nn.Dropout(dropout_rate)
        self.relu = nn.LeakyReLU(0.1)
        
    def forward(self, x):
        identity = x
        out = self.fc(x)
        out = self.bn(out)
        out = self.relu(out)
        out = self.dropout(out)
        out = out + identity  # Skip Connection!
        return out

class ResidualNet(nn.Module):
    """Deep Residual Network for Regression"""
    def __init__(self, input_dim, dropout_rate=0.25):
        super(ResidualNet, self).__init__()
        
        # Encoder
        self.fc1 = nn.Linear(input_dim, 128)
        self.bn1 = nn.BatchNorm1d(128)
        self.dropout1 = nn.Dropout(dropout_rate)
        
        # Residual Blocks
        self.res1 = ResidualBlock(128, dropout_rate)
        self.res2 = ResidualBlock(128, dropout_rate)
        
        # Decoder
        self.fc2 = nn.Linear(128, 64)
        self.bn2 = nn.BatchNorm1d(64)
        self.dropout2 = nn.Dropout(dropout_rate * 0.7)
        
        self.fc3 = nn.Linear(64, 32)
        self.bn3 = nn.BatchNorm1d(32)
        self.dropout3 = nn.Dropout(dropout_rate * 0.5)
        
        self.fc4 = nn.Linear(32, 16)
        self.bn4 = nn.BatchNorm1d(16)
        
        # Output
        self.fc_out = nn.Linear(16, 1)
        self.relu = nn.LeakyReLU(0.1)
        
    def forward(self, x):
        # Encoder
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.dropout1(x)
        
        # Residual Blocks
        x = self.res1(x)
        x = self.res2(x)
        
        # Decoder
        x = self.fc2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.dropout2(x)
        
        x = self.fc3(x)
        x = self.bn3(x)
        x = self.relu(x)
        x = self.dropout3(x)
        
        x = self.fc4(x)
        x = self.bn4(x)
        x = self.relu(x)
        
        x = self.fc_out(x).squeeze()
        return x

In [7]:
# ===== TRAINING FUNCTION =====
def train_model(X_train, y_train, X_val, y_val, seed=42, epochs=500):
    """Train a single model"""
    # Set seed
    np.random.seed(seed)
    torch.manual_seed(seed)
    
    # Convert to tensors
    X_train_t = torch.FloatTensor(X_train).to(device)
    y_train_t = torch.FloatTensor(y_train).to(device)
    X_val_t = torch.FloatTensor(X_val).to(device)
    y_val_t = torch.FloatTensor(y_val).to(device)
    
    # DataLoaders
    train_dataset = TensorDataset(X_train_t, y_train_t)
    val_dataset = TensorDataset(X_val_t, y_val_t)
    train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False)
    
    # Model
    model = ResidualNet(input_dim=X_train.shape[1], dropout_rate=0.25).to(device)
    
    # Huber Loss (robuster als MSE!)
    criterion = nn.HuberLoss(delta=1.0)
    
    # Optimizer
    optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.01)
    scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=50, T_mult=2)
    
    # Early Stopping
    best_loss = float('inf')
    patience_counter = 0
    patience = 50
    best_model_state = None
    
    # Training Loop
    for epoch in range(epochs):
        # Train
        model.train()
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            predictions = model(X_batch)
            loss = criterion(predictions, y_batch)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
        
        # Validate
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                predictions = model(X_batch)
                loss = criterion(predictions, y_batch)
                val_loss += loss.item()
        val_loss /= len(val_loader)
        
        scheduler.step()
        
        # Early Stopping
        if val_loss < best_loss - 0.0001:
            best_loss = val_loss
            best_model_state = model.state_dict().copy()
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                break
    
    # Load best model
    model.load_state_dict(best_model_state)
    return model

In [8]:
# ===== ENSEMBLE LEARNING - Train 5 Models =====
print("Training Ensemble (5 models)...")
ensemble_models = []
seeds = [42, 123, 456, 789, 999]

for i, seed in enumerate(seeds, 1):
    print(f"Training model {i}/5 (seed={seed})...")
    model = train_model(X_train_scaled, y_train_scaled, X_val_scaled, y_val_scaled, seed=seed)
    ensemble_models.append(model)

print("\nEnsemble Training Complete!")

Training Ensemble (5 models)...
Training model 1/5 (seed=42)...
Training model 2/5 (seed=123)...
Training model 3/5 (seed=456)...
Training model 4/5 (seed=789)...
Training model 5/5 (seed=999)...

Ensemble Training Complete!


In [9]:
# ===== ENSEMBLE PREDICTIONS =====
def ensemble_predict(models, X_tensor):
    """Average predictions from all models"""
    predictions = []
    for model in models:
        model.eval()
        with torch.no_grad():
            pred = model(X_tensor).cpu().numpy()
            predictions.append(pred)
    return np.mean(predictions, axis=0)

# Validation Predictions
X_val_tensor = torch.FloatTensor(X_val_scaled).to(device)
val_predictions_scaled = ensemble_predict(ensemble_models, X_val_tensor)

# Inverse Transform
val_predictions_log = y_scaler.inverse_transform(val_predictions_scaled.reshape(-1, 1)).flatten()
val_predictions = np.expm1(val_predictions_log)

y_val_log_inv = y_scaler.inverse_transform(y_val_scaled.reshape(-1, 1)).flatten()
y_val_original = np.expm1(y_val_log_inv)

In [10]:
# ===== FINAL VALIDATION METRICS =====
rmse = np.sqrt(mean_squared_error(y_val_original, val_predictions))
mae = mean_absolute_error(y_val_original, val_predictions)
r2 = r2_score(y_val_original, val_predictions)
mape = np.mean(np.abs((y_val_original - val_predictions) / y_val_original)) * 100

print(f"\n{'='*60}")
print(f"FINAL VALIDATION METRICS (Ensemble of 5 Models)")
print(f"{'='*60}")
print(f"RMSE:  ${rmse:,.2f}")
print(f"MAE:   ${mae:,.2f}")
print(f"R²:    {r2:.4f}")
print(f"MAPE:  {mape:.2f}%")
print(f"{'='*60}")


FINAL VALIDATION METRICS (Ensemble of 5 Models)
RMSE:  $52,992.65
MAE:   $33,557.81
R²:    0.7922
MAPE:  16.64%


In [11]:
# ===== TEST PREDICTIONS =====
X_test_tensor = torch.FloatTensor(X_test_scaled).to(device)
test_predictions_scaled = ensemble_predict(ensemble_models, X_test_tensor)

# Inverse Transform
test_predictions_log = y_scaler.inverse_transform(test_predictions_scaled.reshape(-1, 1)).flatten()
test_predictions = np.expm1(test_predictions_log)

# Submission
submission = pd.DataFrame({
    'Id': range(len(test_predictions)),
    'Predicted': test_predictions
})

submission.to_csv('submission_optimized_ensemble.csv', index=False)
print(f"\n✓ Submission saved: submission_optimized_ensemble.csv")


✓ Submission saved: submission_optimized_ensemble.csv
