In [27]:
import os
import sys
import pickle
import glob
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
import matplotlib.pyplot as plt

class AdvancedDisasterRiskPreprocessor:
    """
    Comprehensive data preprocessing for disaster risk analysis
    """
    def __init__(self, dataframes):
        """
        Initialize preprocessor with multiple dataframes
        
        Args:
            dataframes (list): List of input dataframes
        """
        self.dataframes = dataframes
        self.processed_features = None
        self.risk_labels = None
    
    def select_and_engineer_features(self):
        """
        Advanced feature selection and engineering across multiple datasets
        
        Returns:
            tuple: Processed features and risk labels
        """
        # Comprehensive risk-related features with fallback columns
        RISK_FEATURE_GROUPS = [
            ['magnitude', 'eq_magnitude', 'Magnitude'],
            ['depth', 'eq_depth', 'Depth'],
            ['wind', 'wind_speed', 'Wind'],
            ['tsunami', 'ts_intensity', 'Tsunami'],
            ['sig', 'Significance'],
            ['mmi', 'MMI'],
            ['pressure', 'Pressure'],
            ['temp', 'Temperature'],
            ['area', 'Area'],
            ['category', 'Category']
        ]
        
        # Damage and impact features with fallback columns
        DAMAGE_FEATURE_GROUPS = [
            ['total_damage_($mil)', 'damage_($mil)', 'Total Damage'],
            ['houses_destroyed', 'Houses Destroyed'],
            ['deaths', 'Total Deaths'],
            ['injuries', 'Total Injuries']
        ]
        
        # Preprocessing steps
        def safe_feature_extraction(row, feature_group):
            """
            Safely extract numeric value from multiple possible columns
            
            Args:
                row (pd.Series): DataFrame row
                feature_group (list): Possible column names
            
            Returns:
                float: Extracted numeric value
            """
            for feature in feature_group:
                # Try to extract value
                value = row.get(feature, np.nan)
                
                # Convert to numeric
                try:
                    numeric_value = pd.to_numeric(value, errors='coerce')
                    if not pd.isna(numeric_value):
                        return numeric_value
                except:
                    continue
            
            return 0.0
        
        # Prepare feature matrix
        feature_matrix = []
        risk_labels = []
        
        # Process each dataframe
        for df in self.dataframes:
            # Process each row in the dataframe
            for _, row in df.iterrows():
                # Extract risk-related features
                features = []
                
                # Extract features from groups
                for feature_group in RISK_FEATURE_GROUPS + DAMAGE_FEATURE_GROUPS:
                    features.append(safe_feature_extraction(row, feature_group))
                
                # Risk scoring mechanism
                def calculate_risk_score(features):
                    """
                    Comprehensive risk scoring
                    
                    Args:
                        features (list): Input features
                    
                    Returns:
                        int: Risk category
                    """
                    # Weighted risk calculation
                    risk_weights = {
                        'magnitude': 0.2,
                        'depth': 0.1,
                        'wind': 0.1,
                        'tsunami': 0.1,
                        'sig': 0.1,
                        'mmi': 0.1,
                        'pressure': 0.1,
                        'temp': 0.1,
                        'area': 0.05,
                        'category': 0.05,
                        'damage': 0.1
                    }
                    
                    # Calculate total risk
                    total_risk = 0
                    for i, (feature_group, weight_key) in enumerate(
                        zip(RISK_FEATURE_GROUPS + DAMAGE_FEATURE_GROUPS, 
                            list(risk_weights.keys()))
                    ):
                        # Normalize feature
                        if len(features) > i:
                            normalized_value = min(max(features[i] / (max(features[i], 1)), 0), 1)
                            feature_weight = risk_weights.get(weight_key, 0.05)
                            total_risk += normalized_value * feature_weight
                    
                    # Risk categorization
                    if total_risk <= 0.2:
                        return 0  # Very Low Risk
                    elif total_risk <= 0.4:
                        return 1  # Low Risk
                    elif total_risk <= 0.6:
                        return 2  # Moderate Risk
                    elif total_risk <= 0.8:
                        return 3  # High Risk
                    else:
                        return 4  # Extreme Risk
                
                # Calculate risk label
                risk_label = calculate_risk_score(features)
                
                # Append features and label
                feature_matrix.append(features)
                risk_labels.append(risk_label)
        
        # Convert to numpy arrays
        X = np.array(feature_matrix, dtype=np.float32)
        y = np.array(risk_labels, dtype=np.int32)
        
        # Impute any remaining missing values
        numeric_imputer = SimpleImputer(strategy='median')
        X = numeric_imputer.fit_transform(X)
        
        # Store processed data
        self.processed_features = X
        self.risk_labels = y
        
        # Logging
        print("\n--- Feature Engineering Diagnostics ---")
        print(f"Feature Matrix Shape: {X.shape}")
        print("Feature Columns:", 
              [col for group in RISK_FEATURE_GROUPS + DAMAGE_FEATURE_GROUPS for col in group])
        print("Risk Level Distribution:")
        unique, counts = np.unique(y, return_counts=True)
        for label, count in zip(unique, counts):
            print(f"  Risk Level {label}: {count} samples")
        
        return X, y

class DisasterRiskNetwork(nn.Module):
    """
    Sophisticated Neural Network for Disaster Risk Prediction
    """
    def __init__(self, input_dim, num_classes):
        super().__init__()
        
        # Feature extraction with adaptive architecture
        self.feature_extractor = nn.Sequential(
            nn.Linear(input_dim, max(64, input_dim * 2)),
            nn.BatchNorm1d(max(64, input_dim * 2)),
            nn.ReLU(),
            nn.Dropout(0.3),
            
            nn.Linear(max(64, input_dim * 2), max(64, input_dim * 2)),
            nn.BatchNorm1d(max(64, input_dim * 2)),
            nn.ReLU(),
            
            nn.Linear(max(64, input_dim * 2), 32),
            nn.BatchNorm1d(32),
            nn.ReLU()
        )
        
        self.classifier = nn.Sequential(
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(16, num_classes)
        )
    
    def forward(self, x):
        features = self.feature_extractor(x)
        return self.classifier(features)

def train_advanced_disaster_model(X, y, epochs=500):
    """
    Comprehensive training pipeline with advanced techniques
    
    Args:
        X (np.array): Input features
        y (np.array): Target labels
        epochs (int): Number of training epochs
    
    Returns:
        tuple: Model, scaler, label encoder, and training diagnostics
    """
    # Preprocessing
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    # Label encoding
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)
    
    # Train-test split with stratification
    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y_encoded, 
        test_size=0.2, 
        random_state=42, 
        stratify=y_encoded
    )
    
    # Convert to PyTorch tensors
    X_train_tensor = torch.FloatTensor(X_train)
    X_test_tensor = torch.FloatTensor(X_test)
    y_train_tensor = torch.LongTensor(y_train)
    y_test_tensor = torch.LongTensor(y_test)
    
    # Model initialization
    num_classes = len(np.unique(y_encoded))
    model = DisasterRiskNetwork(input_dim=X.shape[1], num_classes=num_classes)
    
    # Loss and Optimizer with gradient clipping
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    # Learning rate scheduler
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, 
        mode='min', 
        factor=0.5, 
        patience=20
    )
    
    # Training diagnostics
    diagnostics = {
        'train_loss': [], 'test_loss': [],
        'train_accuracy': [], 'test_accuracy': []
    }
    
    # Training loop with advanced error handling
    for epoch in range(epochs):
        try:
            # Training phase
            model.train()
            optimizer.zero_grad()
            train_outputs = model(X_train_tensor)
            train_loss = criterion(train_outputs, y_train_tensor)
            
            # Gradient clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            
            train_loss.backward()
            optimizer.step()
            
            # Validation phase
            model.eval()
            with torch.no_grad():
                test_outputs = model(X_test_tensor)
                test_loss = criterion(test_outputs, y_test_tensor)
            
            # Calculate accuracies
            train_pred = torch.argmax(train_outputs, dim=1)
            test_pred = torch.argmax(test_outputs, dim=1)
            
            train_accuracy = (train_pred == y_train_tensor).float().mean()
            test_accuracy = (test_pred == y_test_tensor).float().mean()
            
            # Update learning rate
            scheduler.step(test_loss)
            
            # Record diagnostics
            diagnostics['train_loss'].append(train_loss.item())
            diagnostics['test_loss'].append(test_loss.item())
            diagnostics['train_accuracy'].append(train_accuracy.item())
            diagnostics['test_accuracy'].append(test_accuracy.item())
            
            # Periodic logging
            if epoch % 50 == 0:
                print(
                    f"Epoch {epoch}: "
                    f"Train Loss {train_loss.item():.4f}, "
                    f"Train Acc {train_accuracy.item():.4f}, "
                    f"Test Loss {test_loss.item():.4f}, "
                    f"Test Acc {test_accuracy.item():.4f}"
                )
        
        except Exception as e:
            print(f"Error in epoch {epoch}: {e}")
    
    return model, scaler, label_encoder, diagnostics

def save_model_components(model, scaler, label_encoder, save_path='disaster_risk_model.pkl'):
    """
    Save model components for deployment
    
    Args:
        model (nn.Module): Trained neural network
        scaler (StandardScaler): Feature scaler
        label_encoder (LabelEncoder): Label encoder
        save_path (str): Path to save model components
    """
    # Prepare model for saving
    model.eval()  # Set to evaluation mode
    
    # Create saving dictionary
    save_dict = {
        'model_state': model.state_dict(),
        'model_class': model.__class__,
        'input_dim': model.feature_extractor[0].in_features,
        'num_classes': model.classifier[-1].out_features,
        'scaler': scaler,
        'label_encoder': label_encoder,
        'feature_names': [
            # Risk-related features
            'Magnitude', 'Depth', 'Wind Speed', 'Tsunami Intensity', 
            'Significance', 'MMI', 'Pressure', 'Temperature', 
            'Area', 'Category', 
            
            # Damage features
            'Total Damage ($mil)', 'Houses Destroyed', 
            'Deaths', 'Injuries'
        ]
    }
    
    # Save to pickle
    with open(save_path, 'wb') as f:
        pickle.dump(save_dict, f)
    
    print(f"Model components saved to {save_path}")

def load_all_datasets(directory='.'):
    """
    Load all CSV and Excel files from a directory
    
    Args:
        directory (str): Directory to search for datasets
    
    Returns:
        list: List of loaded dataframes
    """
    # Find all CSV and Excel files
    csv_files = glob.glob(os.path.join(directory, '*.csv'))
    excel_files = glob.glob(os.path.join(directory, '*.xlsx'))
    
    # Combine file lists
    all_files = csv_files + excel_files
    
    # Load datasets
    dataframes = []
    for file_path in all_files:
        try:
            # Skip certain files
            if 'diagnostics' in file_path.lower():
                continue
            
            # Load based on file extension
            if file_path.endswith('.csv'):
                df = pd.read_csv(file_path)
            else:
                df = pd.read_excel(file_path)
            
            print(f"Loaded dataset from {file_path}: {len(df)} rows")
            dataframes.append(df)
        
        except Exception as e:
            print(f"Error loading {file_path}: {e}")
    
    return dataframes

def main():
    """
    Comprehensive disaster risk prediction pipeline
    """
    try:
        # Load all datasets from current directory
        dataframes = load_all_datasets()
        
        # Check if datasets were loaded
        if not dataframes:
            raise ValueError("No datasets found. Please ensure CSV or XLSX files are present.")
        
        # Initialize preprocessor with all datasets
        preprocessor = AdvancedDisasterRiskPreprocessor(dataframes)
        
        # Extract and engineer features
        X, y = preprocessor.select_and_engineer_features()
        
        # Train advanced model
        model, scaler, label_encoder, training_diagnostics = train_advanced_disaster_model(X, y)
        
        # Save model components for deployment
        save_model_components(
            model, 
            scaler, 
            label_encoder, 
            save_path='disaster_risk_model.pkl'
        )
    
    except Exception as e:
        import traceback
        print("An error occurred:")
        traceback.print_exc()

if __name__ == "__main__":
    main()

Loaded dataset from .\earthquake.csv: 1000 rows
Loaded dataset from .\flood.csv: 50000 rows
Loaded dataset from .\forestfires.csv: 517 rows
Loaded dataset from .\landslide.csv: 28 rows
Loaded dataset from .\storms.csv: 12230 rows
Loaded dataset from .\stroms.csv: 59228 rows
Loaded dataset from .\tsunami.csv: 2259 rows
Loaded dataset from .\volcano.csv: 877 rows
Loaded dataset from .\mix_all.xlsx: 26955 rows

--- Feature Engineering Diagnostics ---
Feature Matrix Shape: (153094, 14)
Feature Columns: ['magnitude', 'eq_magnitude', 'Magnitude', 'depth', 'eq_depth', 'Depth', 'wind', 'wind_speed', 'Wind', 'tsunami', 'ts_intensity', 'Tsunami', 'sig', 'Significance', 'mmi', 'MMI', 'pressure', 'Pressure', 'temp', 'Temperature', 'area', 'Area', 'category', 'Category', 'total_damage_($mil)', 'damage_($mil)', 'Total Damage', 'houses_destroyed', 'Houses Destroyed', 'deaths', 'Total Deaths', 'injuries', 'Total Injuries']
Risk Level Distribution:
  Risk Level 0: 148104 samples
  Risk Level 1: 3990 sa

In [30]:
import os
import sys
import glob
import pickle
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.metrics import (
    confusion_matrix, 
    classification_report, 
    precision_recall_curve, 
    roc_curve, 
    auc
)

class ComprehensiveDisasterRiskPreprocessor:
    """
    Advanced data preprocessing for multi-disaster risk analysis
    """
    def __init__(self, dataframes):
        """
        Initialize preprocessor with multiple dataframes
        
        Args:
            dataframes (list): List of input dataframes
        """
        self.dataframes = dataframes
        self.processed_features = None
        self.risk_labels = None
        self.feature_names = []
    
    def _get_comprehensive_feature_groups(self):
        """
        Generate comprehensive feature groups across all datasets
        
        Returns:
            tuple: Risk and damage feature groups
        """
        # Comprehensive risk-related features with fallback columns
        RISK_FEATURE_GROUPS = [
            # Earthquake features
            ['magnitude', 'eq_magnitude', 'Magnitude', 'Mag'],
            ['depth', 'eq_depth', 'Depth'],
            
            # Wind and storm features
            ['wind', 'wind_speed', 'Wind Speed', 'WindSpeed'],
            ['hurricane_category', 'Category'],
            
            # Tsunami features
            ['tsunami', 'ts_intensity', 'Tsunami Intensity', 'TsunamiHeight'],
            
            # General disaster indicators
            ['sig', 'Significance', 'SignificanceLevel'],
            ['mmi', 'MMI', 'Modified Mercalli Intensity'],
            
            # Environmental features
            ['pressure', 'Pressure', 'AtmosphericPressure'],
            ['temp', 'Temperature', 'AirTemperature'],
            ['area', 'Area', 'AffectedArea'],
            
            # Location and geographic features
            ['latitude', 'Latitude'],
            ['longitude', 'Longitude']
        ]
        
        # Damage and impact features with fallback columns
        DAMAGE_FEATURE_GROUPS = [
            ['total_damage_($mil)', 'damage_($mil)', 'Total Damage', 'TotalDamage'],
            ['houses_destroyed', 'Houses Destroyed', 'BuildingsDestroyed'],
            ['deaths', 'Total Deaths', 'Fatalities'],
            ['injuries', 'Total Injuries', 'Wounded'],
            ['economic_loss', 'Economic Loss', 'EconomicImpact']
        ]
        
        return RISK_FEATURE_GROUPS, DAMAGE_FEATURE_GROUPS
    
    def select_and_engineer_features(self):
        """
        Advanced feature selection and engineering across multiple datasets
        
        Returns:
            tuple: Processed features and risk labels
        """
        # Get feature groups
        RISK_FEATURE_GROUPS, DAMAGE_FEATURE_GROUPS = self._get_comprehensive_feature_groups()
        
        # Preprocessing steps
        def safe_feature_extraction(row, feature_group):
            """
            Safely extract numeric value from multiple possible columns
            
            Args:
                row (pd.Series): DataFrame row
                feature_group (list): Possible column names
            
            Returns:
                float: Extracted numeric value
            """
            for feature in feature_group:
                # Try to extract value
                value = row.get(feature, np.nan)
                
                # Convert to numeric
                try:
                    numeric_value = pd.to_numeric(value, errors='coerce')
                    if not pd.isna(numeric_value):
                        return numeric_value
                except:
                    continue
            
            return 0.0
        
        # Prepare feature matrix
        feature_matrix = []
        risk_labels = []
        
        # Process each dataframe
        for df in self.dataframes:
            # Process each row in the dataframe
            for _, row in df.iterrows():
                # Extract risk-related features
                features = []
                
                # Extract features from groups
                feature_groups = RISK_FEATURE_GROUPS + DAMAGE_FEATURE_GROUPS
                for feature_group in feature_groups:
                    features.append(safe_feature_extraction(row, feature_group))
                
                # Risk scoring mechanism
                def calculate_risk_score(features):
                    """
                    Comprehensive risk scoring
                    
                    Args:
                        features (list): Input features
                    
                    Returns:
                        int: Risk category
                    """
                    # Weighted risk calculation
                    risk_weights = {
                        'magnitude': 0.15,
                        'depth': 0.1,
                        'wind': 0.1,
                        'hurricane': 0.1,
                        'tsunami': 0.1,
                        'sig': 0.1,
                        'mmi': 0.1,
                        'pressure': 0.05,
                        'temp': 0.05,
                        'area': 0.05,
                        'location': 0.05,
                        'damage': 0.1,
                        'economic': 0.05
                    }
                    
                    # Calculate total risk
                    total_risk = 0
                    for i, (feature_group, weight_key) in enumerate(
                        zip(feature_groups, 
                            list(risk_weights.keys()))
                    ):
                        # Normalize feature
                        if len(features) > i:
                            normalized_value = min(max(features[i] / (max(features[i], 1)), 0), 1)
                            feature_weight = risk_weights.get(weight_key, 0.05)
                            total_risk += normalized_value * feature_weight
                    
                    # Risk categorization
                    if total_risk <= 0.2:
                        return 0  # Very Low Risk
                    elif total_risk <= 0.4:
                        return 1  # Low Risk
                    elif total_risk <= 0.6:
                        return 2  # Moderate Risk
                    elif total_risk <= 0.8:
                        return 3  # High Risk
                    else:
                        return 4  # Extreme Risk
                
                # Calculate risk label
                risk_label = calculate_risk_score(features)
                
                # Append features and label
                feature_matrix.append(features)
                risk_labels.append(risk_label)
        
        # Convert to numpy arrays
        X = np.array(feature_matrix, dtype=np.float32)
        y = np.array(risk_labels, dtype=np.int32)
        
        # Impute any remaining missing values
        numeric_imputer = SimpleImputer(strategy='median')
        X = numeric_imputer.fit_transform(X)
        
        # Store processed data
        self.processed_features = X
        self.risk_labels = y
        
        # Generate feature names
        self.feature_names = [
            col for group in RISK_FEATURE_GROUPS + DAMAGE_FEATURE_GROUPS 
            for col in group
        ][:X.shape[1]]
        
        # Logging
        print("\n--- Feature Engineering Diagnostics ---")
        print(f"Feature Matrix Shape: {X.shape}")
        print("Feature Columns:", self.feature_names)
        print("Risk Level Distribution:")
        unique, counts = np.unique(y, return_counts=True)
        for label, count in zip(unique, counts):
            print(f"  Risk Level {label}: {count} samples")
        
        return X, y

class AdvancedDisasterRiskNetwork(nn.Module):
    """
    Sophisticated Neural Network for Comprehensive Disaster Risk Prediction
    """
    def __init__(self, input_dim, num_classes):
        super().__init__()
        
        # Feature extraction with adaptive architecture
        self.feature_extractor = nn.Sequential(
            nn.Linear(input_dim, max(64, input_dim * 2)),
            nn.BatchNorm1d(max(64, input_dim * 2)),
            nn.ReLU(),
            nn.Dropout(0.3),
            
            nn.Linear(max(64, input_dim * 2), max(64, input_dim * 2)),
            nn.BatchNorm1d(max(64, input_dim * 2)),
            nn.ReLU(),
            
            nn.Linear(max(64, input_dim * 2), 32),
            nn.BatchNorm1d(32),
            nn.ReLU()
        )
        
        self.classifier = nn.Sequential(
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(16, num_classes)
        )
    
    def forward(self, x):
        features = self.feature_extractor(x)
        return self.classifier(features)

def train_advanced_disaster_model(X, y, epochs=500):
    """
    Comprehensive training pipeline with advanced techniques
    
    Args:
        X (np.array): Input features
        y (np.array): Target labels
        epochs (int): Number of training epochs
    
    Returns:
        tuple: Model, scaler, label encoder, and training diagnostics
    """
    # Preprocessing
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    # Label encoding
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)
    
    # Train-test split with stratification
    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y_encoded, 
        test_size=0.2, 
        random_state=42, 
        stratify=y_encoded
    )
    
    # Convert to PyTorch tensors
    X_train_tensor = torch.FloatTensor(X_train)
    X_test_tensor = torch.FloatTensor(X_test)
    y_train_tensor = torch.LongTensor(y_train)
    y_test_tensor = torch.LongTensor(y_test)
    
    # Model initialization
    num_classes = len(np.unique(y_encoded))
    model = AdvancedDisasterRiskNetwork(input_dim=X.shape[1], num_classes=num_classes)
    
    # Loss and Optimizer with gradient clipping
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    # Learning rate scheduler
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, 
        mode='min', 
        factor=0.5, 
        patience=20
    )
    
    # Training diagnostics
    diagnostics = {
        'train_loss': [], 'test_loss': [],
        'train_accuracy': [], 'test_accuracy': [],
        'y_true': [], 'y_pred': []
    }
    
    # Training loop with advanced error handling
    for epoch in range(epochs):
        try:
            # Training phase
            model.train()
            optimizer.zero_grad()
            train_outputs = model(X_train_tensor)
            train_loss = criterion(train_outputs, y_train_tensor)
            
            # Gradient clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            
            train_loss.backward()
            optimizer.step()
            
            # Validation phase
            model.eval()
            with torch.no_grad():
                test_outputs = model(X_test_tensor)
                test_loss = criterion(test_outputs, y_test_tensor)
            
            # Calculate accuracies
            train_pred = torch.argmax(train_outputs, dim=1)
            test_pred = torch.argmax(test_outputs, dim=1)
            
            train_accuracy = (train_pred == y_train_tensor).float().mean()
            test_accuracy = (test_pred == y_test_tensor).float().mean()
            
            # Update learning rate
            scheduler.step(test_loss)
            
            # Record diagnostics
            diagnostics['train_loss'].append(train_loss.item())
            diagnostics['test_loss'].append(test_loss.item())
            diagnostics['train_accuracy'].append(train_accuracy.item())
            diagnostics['test_accuracy'].append(test_accuracy.item())
            
            # Store predictions for final analysis
            if epoch == epochs - 1:
                diagnostics['y_true'] = y_test_tensor.numpy()
                diagnostics['y_pred'] = test_pred.numpy()
            
            # Periodic logging
            if epoch % 50 == 0:
                print(
                    f"Epoch {epoch}: "
                    f"Train Loss {train_loss.item():.4f}, "
                    f"Train Acc {train_accuracy.item():.4f}, "
                    f"Test Loss {test_loss.item():.4f}, "
                    f"Test Acc {test_accuracy.item():.4f}"
                )
        
        except Exception as e:
            print(f"Error in epoch {epoch}: {e}")
    
    return model, scaler, label_encoder, diagnostics

def generate_comprehensive_plots(diagnostics, preprocessor, model, X_test, y_test, label_encoder):
    """
    Generate comprehensive visualization plots
    
    Args:
        diagnostics (dict): Training diagnostics
        preprocessor (ComprehensiveDisasterRiskPreprocessor): Feature preprocessor
        model (nn.Module): Trained neural network
        X_test (np.array): Test features
        y_test (np.array): Test labels
        label_encoder (LabelEncoder): Label encoder
    """
    # Create a figure with multiple subplots
    plt.figure(figsize=(20, 15))
    
    # 1. Training and Test Loss
    plt.subplot(2, 3, 1)
    plt.plot(diagnostics['train_loss'], label='Train Loss')
    plt.plot(diagnostics['test_loss'], label='Test Loss')
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    
    # 2. Training and Test Accuracy
    plt.subplot(2, 3, 2)
    plt.plot(diagnostics['train_accuracy'], label='Train Accuracy')
    plt.plot(diagnostics['test_accuracy'], label='Test Accuracy')
    plt.title('Model Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    
    # 3. Confusion Matrix
    plt.subplot(2, 3, 3)
    y_true = diagnostics['y_true']
    y_pred = diagnostics['y_pred']
    cm = confusion_matrix(y_true, y_pred)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    
    # 4. Feature Importance Visualization
    plt.subplot(2, 3, 4)
    feature_importance = np.abs(model.feature_extractor[0].weight.detach().numpy()).mean(axis=0)
    feature_names = preprocessor.feature_names[:len(feature_importance)]
    plt.bar(feature_names, feature_importance)
    plt.title('Feature Importance')
    plt.xticks(rotation=90)
    plt.xlabel('Features')
    plt.ylabel('Importance')
    
    # 5. ROC Curve with One-vs-Rest approach
    plt.subplot(2, 3, 5)
    X_test_tensor = torch.FloatTensor(X_test)
    y_pred_proba = torch.softmax(model(X_test_tensor), dim=1).detach().numpy()
    n_classes = y_pred_proba.shape[1]
    
    # One-vs-Rest ROC
    plt.plot([0, 1], [0, 1], 'k--')
    for i in range(n_classes):
        # Binarize the output for this class
        y_true_bin = (y_test == i).astype(int)
        fpr, tpr, _ = roc_curve(y_true_bin, y_pred_proba[:, i])
        roc_auc = auc(fpr, tpr)
        plt.plot(fpr, tpr, 
                 label=f'ROC (class {label_encoder.inverse_transform([i])[0]}, AUC = {roc_auc:.2f})')
    
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic')
    plt.legend(loc="lower right")
    
    # 6. Precision-Recall Curve with One-vs-Rest approach
    plt.subplot(2, 3, 6)
    for i in range(n_classes):
        # Binarize the output for this class
        y_true_bin = (y_test == i).astype(int)
        precision, recall, _ = precision_recall_curve(y_true_bin, y_pred_proba[:, i])
        plt.plot(recall, precision, 
                 label=f'PR (class {label_encoder.inverse_transform([i])[0]})')
    
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Precision-Recall Curve')
    plt.legend(loc="lower right")
    
    plt.tight_layout()
    plt.savefig('disaster_risk_analysis_plots.png')
    plt.close()
    
    # Print classification report
    print("\n--- Classification Report ---")
    print(classification_report(
        y_true, 
        y_pred, 
        target_names=[str(label) for label in label_encoder.classes_]
    ))

def save_model_components(model, scaler, label_encoder, preprocessor, save_path='disaster_risk_model.pkl'):
    """
    Save model components for deployment
    
    Args:
        model (nn.Module): Trained neural network
        scaler (StandardScaler): Feature scaler
        label_encoder (LabelEncoder): Label encoder
        preprocessor (ComprehensiveDisasterRiskPreprocessor): Feature preprocessor
        save_path (str): Path to save model components
    """
    # Prepare model for saving
    model.eval()  # Set to evaluation mode
    
    # Create saving dictionary
    save_dict = {
        'model_state': model.state_dict(),
        'model_class': model.__class__,
        'input_dim': model.feature_extractor[0].in_features,
        'num_classes': model.classifier[-1].out_features,
        'scaler': scaler,
        'label_encoder': label_encoder,
        'feature_names': preprocessor.feature_names
    }
    
    # Save to pickle
    with open(save_path, 'wb') as f:
        pickle.dump(save_dict, f)
    
    print(f"Model components saved to {save_path}")

def load_all_datasets(directory='.'):
    """
    Load all CSV and Excel files from a directory
    
    Args:
        directory (str): Directory to search for datasets
    
    Returns:
        list: List of loaded dataframes
    """
    # Find all CSV and Excel files
    csv_files = glob.glob(os.path.join(directory, '*.csv'))
    excel_files = glob.glob(os.path.join(directory, '*.xlsx'))
    
    # Combine file lists
    all_files = csv_files + excel_files
    
    # Load datasets
    dataframes = []
    for file_path in all_files:
        try:
            # Skip certain files
            if 'diagnostics' in file_path.lower():
                continue
            
            # Load based on file extension
            if file_path.endswith('.csv'):
                df = pd.read_csv(file_path)
            else:
                df = pd.read_excel(file_path)
            
            print(f"Loaded dataset from {file_path}: {len(df)} rows")
            dataframes.append(df)
        
        except Exception as e:
            print(f"Error loading {file_path}: {e}")
    
    return dataframes

def main():
    """
    Comprehensive disaster risk prediction pipeline
    """
    try:
        # Load all datasets from current directory
        dataframes = load_all_datasets()
        
        # Check if datasets were loaded
        if not dataframes:
            raise ValueError("No datasets found. Please ensure CSV or XLSX files are present.")
        
        # Initialize preprocessor with all datasets
        preprocessor = ComprehensiveDisasterRiskPreprocessor(dataframes)
        
        # Extract and engineer features
        X, y = preprocessor.select_and_engineer_features()
        
        # Train advanced model
        model, scaler, label_encoder, training_diagnostics = train_advanced_disaster_model(X, y)
        
        # Split data for final evaluation
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, 
            test_size=0.2, 
            random_state=42, 
            stratify=y
        )
        
        # Generate comprehensive plots
        generate_comprehensive_plots(
            training_diagnostics, 
            preprocessor, 
            model, 
            scaler.transform(X_test), 
            y_test, 
            label_encoder
        )
        
        # Save model components for deployment
        save_model_components(
            model, 
            scaler, 
            label_encoder, 
            preprocessor,
            save_path='risk_model.pkl'
        )
    
    except Exception as e:
        import traceback
        print("An error occurred:")
        traceback.print_exc()

if __name__ == "__main__":
    main()

Loaded dataset from .\earthquake.csv: 1000 rows
Loaded dataset from .\flood.csv: 50000 rows
Loaded dataset from .\forestfires.csv: 517 rows
Loaded dataset from .\landslide.csv: 28 rows
Loaded dataset from .\storms.csv: 12230 rows
Loaded dataset from .\stroms.csv: 59228 rows
Loaded dataset from .\tsunami.csv: 2259 rows
Loaded dataset from .\volcano.csv: 877 rows
Loaded dataset from .\mix_all.xlsx: 26955 rows

--- Feature Engineering Diagnostics ---
Feature Matrix Shape: (153094, 17)
Feature Columns: ['magnitude', 'eq_magnitude', 'Magnitude', 'Mag', 'depth', 'eq_depth', 'Depth', 'wind', 'wind_speed', 'Wind Speed', 'WindSpeed', 'hurricane_category', 'Category', 'tsunami', 'ts_intensity', 'Tsunami Intensity', 'TsunamiHeight']
Risk Level Distribution:
  Risk Level 0: 150237 samples
  Risk Level 1: 1857 samples
  Risk Level 2: 826 samples
  Risk Level 3: 174 samples
Epoch 0: Train Loss 1.2750, Train Acc 0.9448, Test Loss 1.2572, Test Acc 0.9820
Epoch 50: Train Loss 0.7233, Train Acc 0.9841, 