In [10]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score
import joblib
import json
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

In [11]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
RANDOM_SEED = 42
torch.manual_seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed(RANDOM_SEED)

Using device: cuda


In [12]:
data_path = Path('../data/processed/processed.csv')
model_dir = Path('../models')
model_dir.mkdir(exist_ok=True)

In [13]:
df = pd.read_csv(data_path)
print(f"Loaded dataset: {df.shape}")

Loaded dataset: (503, 61)


In [14]:
target_col = 'ESG Risk Level'
exclude_cols = ['Symbol', 'Name', 'Address', 'Description', 'Sector', 'Industry', 
                'Controversy Level', 'ESG Risk Level', 'ESG Risk Percentile', 'Employee Size']
numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
feature_cols = [col for col in numeric_cols if col not in exclude_cols]

In [15]:
X = df[feature_cols].copy()
y = df[target_col].copy()
X = X.fillna(0)
y = y.fillna('Medium')
print(f"Features: {len(feature_cols)}")
print(f"Target distribution:\n{y.value_counts()}")

Features: 50
Target distribution:
Low           260
Medium        184
High           50
Negligible      6
Severe          3
Name: ESG Risk Level, dtype: int64


In [16]:
label_mapping = {'Low': 0, 'Negligible': 0, 'Medium': 1, 'High': 2, 'Severe': 2}
y_encoded = y.map(label_mapping)
y_encoded = y_encoded.fillna(1).astype(int)
num_classes = len(y_encoded.unique())
print(f"Classes: {sorted(y_encoded.unique())}")
print(f"Class distribution:\n{y_encoded.value_counts().sort_index()}")

Classes: [0, 1, 2]
Class distribution:
0    266
1    184
2     53
Name: ESG Risk Level, dtype: int64


In [17]:
X_temp, X_test, y_temp, y_test = train_test_split(
    X, y_encoded, test_size=0.15, random_state=RANDOM_SEED, stratify=y_encoded
)
X_train, X_val, y_train, y_val = train_test_split(
    X_temp, y_temp, test_size=0.176, random_state=RANDOM_SEED, stratify=y_temp
)
print(f"Train: {X_train.shape}, Val: {X_val.shape}, Test: {X_test.shape}")

Train: (351, 50), Val: (76, 50), Test: (76, 50)


In [18]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

In [19]:
X_train_tensor = torch.FloatTensor(X_train_scaled)
y_train_tensor = torch.LongTensor(y_train.values)
X_val_tensor = torch.FloatTensor(X_val_scaled)
y_val_tensor = torch.LongTensor(y_val.values)
X_test_tensor = torch.FloatTensor(X_test_scaled)
y_test_tensor = torch.LongTensor(y_test.values)

In [20]:
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

In [21]:
class ESGRiskClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dims, num_classes, dropout=0.3):
        super(ESGRiskClassifier, self).__init__()
        layers = []
        prev_dim = input_dim
        for hidden_dim in hidden_dims:
            layers.append(nn.Linear(prev_dim, hidden_dim))
            layers.append(nn.BatchNorm1d(hidden_dim))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout))
            prev_dim = hidden_dim
        layers.append(nn.Linear(prev_dim, num_classes))
        self.network = nn.Sequential(*layers)
    
    def forward(self, x):
        return self.network(x)

In [22]:
input_dim = X_train_scaled.shape[1]
hidden_dims = [256, 128, 64]
model = ESGRiskClassifier(input_dim, hidden_dims, num_classes, dropout=0.3).to(device)
print(f"Model architecture:\n{model}")
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"\nTotal parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")

Model architecture:
ESGRiskClassifier(
  (network): Sequential(
    (0): Linear(in_features=50, out_features=256, bias=True)
    (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.3, inplace=False)
    (4): Linear(in_features=256, out_features=128, bias=True)
    (5): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): Dropout(p=0.3, inplace=False)
    (8): Linear(in_features=128, out_features=64, bias=True)
    (9): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU()
    (11): Dropout(p=0.3, inplace=False)
    (12): Linear(in_features=64, out_features=3, bias=True)
  )
)

Total parameters: 55,299
Trainable parameters: 55,299


In [23]:
class_counts = torch.bincount(y_train_tensor)
class_weights = 1.0 / class_counts.float()
class_weights = class_weights / class_weights.sum() * len(class_weights)
class_weights = class_weights.to(device)
criterion = nn.CrossEntropyLoss(weight=class_weights)

In [24]:
def train_with_config(batch_size, learning_rate, weight_decay, num_epochs):
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    
    model_instance = ESGRiskClassifier(input_dim, hidden_dims, num_classes, dropout=0.3).to(device)
    optimizer = optim.AdamW(model_instance.parameters(), lr=learning_rate, weight_decay=weight_decay)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, verbose=False)
    
    best_val_loss = float('inf')
    best_val_acc = 0.0
    patience_counter = 0
    early_stop_patience = 15
    
    for epoch in range(num_epochs):
        model_instance.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0
        
        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            optimizer.zero_grad()
            outputs = model_instance(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model_instance.parameters(), max_norm=1.0)
            optimizer.step()
            
            train_loss += loss.item() * batch_X.size(0)
            _, predicted = torch.max(outputs.data, 1)
            train_total += batch_y.size(0)
            train_correct += (predicted == batch_y).sum().item()
        
        model_instance.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0
        
        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                outputs = model_instance(batch_X)
                loss = criterion(outputs, batch_y)
                val_loss += loss.item() * batch_X.size(0)
                _, predicted = torch.max(outputs.data, 1)
                val_total += batch_y.size(0)
                val_correct += (predicted == batch_y).sum().item()
        
        train_loss = train_loss / train_total
        train_acc = train_correct / train_total
        val_loss = val_loss / val_total
        val_acc = val_correct / val_total
        
        scheduler.step(val_loss)
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_val_acc = val_acc
            patience_counter = 0
        else:
            patience_counter += 1
        
        if patience_counter >= early_stop_patience:
            break
    
    return best_val_loss, best_val_acc, model_instance

In [25]:
hyperparameter_grid = {
    'batch_size': [32, 64],
    'learning_rate': [0.001, 0.0005],
    'weight_decay': [1e-4, 1e-5],
    'num_epochs': [100]
}

In [26]:
best_config = None
best_val_loss = float('inf')
best_model_state = None
results = []

for batch_size in hyperparameter_grid['batch_size']:
    for lr in hyperparameter_grid['learning_rate']:
        for wd in hyperparameter_grid['weight_decay']:
            for epochs in hyperparameter_grid['num_epochs']:
                print(f"\nTesting: bs={batch_size}, lr={lr}, wd={wd}, epochs={epochs}")
                val_loss, val_acc, trained_model = train_with_config(batch_size, lr, wd, epochs)
                print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
                
                results.append({
                    'batch_size': batch_size,
                    'learning_rate': lr,
                    'weight_decay': wd,
                    'num_epochs': epochs,
                    'val_loss': val_loss,
                    'val_acc': val_acc
                })
                
                if val_loss < best_val_loss:
                    best_val_loss = val_loss
                    best_config = {
                        'batch_size': batch_size,
                        'learning_rate': lr,
                        'weight_decay': wd,
                        'num_epochs': epochs
                    }
                    best_model_state = trained_model.state_dict()

print(f"\nBest configuration: {best_config}")
print(f"Best validation loss: {best_val_loss:.4f}")


Testing: bs=32, lr=0.001, wd=0.0001, epochs=100
Val Loss: 0.0001, Val Acc: 1.0000

Testing: bs=32, lr=0.001, wd=1e-05, epochs=100
Val Loss: 0.0001, Val Acc: 1.0000

Testing: bs=32, lr=0.0005, wd=0.0001, epochs=100
Val Loss: 0.0004, Val Acc: 1.0000

Testing: bs=32, lr=0.0005, wd=1e-05, epochs=100
Val Loss: 0.0003, Val Acc: 1.0000

Testing: bs=64, lr=0.001, wd=0.0001, epochs=100
Val Loss: 0.0004, Val Acc: 1.0000

Testing: bs=64, lr=0.001, wd=1e-05, epochs=100
Val Loss: 0.0033, Val Acc: 1.0000

Testing: bs=64, lr=0.0005, wd=0.0001, epochs=100
Val Loss: 0.0017, Val Acc: 1.0000

Testing: bs=64, lr=0.0005, wd=1e-05, epochs=100
Val Loss: 0.0013, Val Acc: 1.0000

Best configuration: {'batch_size': 32, 'learning_rate': 0.001, 'weight_decay': 1e-05, 'num_epochs': 100}
Best validation loss: 0.0001


In [27]:
final_model = ESGRiskClassifier(input_dim, hidden_dims, num_classes, dropout=0.3).to(device)
final_model.load_state_dict(best_model_state)
final_model.eval()

ESGRiskClassifier(
  (network): Sequential(
    (0): Linear(in_features=50, out_features=256, bias=True)
    (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.3, inplace=False)
    (4): Linear(in_features=256, out_features=128, bias=True)
    (5): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): Dropout(p=0.3, inplace=False)
    (8): Linear(in_features=128, out_features=64, bias=True)
    (9): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU()
    (11): Dropout(p=0.3, inplace=False)
    (12): Linear(in_features=64, out_features=3, bias=True)
  )
)

In [28]:
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
all_preds = []
all_labels = []

with torch.no_grad():
    for batch_X, batch_y in test_loader:
        batch_X = batch_X.to(device)
        outputs = final_model(batch_X)
        _, predicted = torch.max(outputs.data, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(batch_y.numpy())

test_accuracy = accuracy_score(all_labels, all_preds)
test_f1 = f1_score(all_labels, all_preds, average='weighted')
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Test F1 Score: {test_f1:.4f}")

Test Accuracy: 1.0000
Test F1 Score: 1.0000


In [29]:
class_names = ['Low', 'Medium', 'High']
print("\nClassification Report:")
print(classification_report(all_labels, all_preds, target_names=class_names))
print("\nConfusion Matrix:")
print(confusion_matrix(all_labels, all_preds))


Classification Report:
              precision    recall  f1-score   support

         Low       1.00      1.00      1.00        40
      Medium       1.00      1.00      1.00        28
        High       1.00      1.00      1.00         8

    accuracy                           1.00        76
   macro avg       1.00      1.00      1.00        76
weighted avg       1.00      1.00      1.00        76


Confusion Matrix:
[[40  0  0]
 [ 0 28  0]
 [ 0  0  8]]


In [30]:
torch.save({
    'model_state_dict': final_model.state_dict(),
    'model_architecture': {
        'input_dim': input_dim,
        'hidden_dims': hidden_dims,
        'num_classes': num_classes,
        'dropout': 0.3
    },
    'best_config': best_config,
    'scaler_params': {
        'mean': scaler.mean_.tolist(),
        'scale': scaler.scale_.tolist()
    },
    'feature_columns': feature_cols,
    'label_mapping': label_mapping,
    'test_accuracy': test_accuracy,
    'test_f1_score': test_f1
}, model_dir / 'esg_risk_model.pt')

print(f"Model saved to {model_dir / 'esg_risk_model.pt'}")

Model saved to ..\models\esg_risk_model.pt


In [31]:
joblib.dump(scaler, model_dir / 'scaler.pkl')
print(f"Scaler saved to {model_dir / 'scaler.pkl'}")

Scaler saved to ..\models\scaler.pkl


In [32]:
metadata = {
    'model_type': 'ESGRiskClassifier',
    'input_features': len(feature_cols),
    'num_classes': num_classes,
    'class_names': class_names,
    'training_samples': len(X_train),
    'validation_samples': len(X_val),
    'test_samples': len(X_test),
    'test_accuracy': float(test_accuracy),
    'test_f1_score': float(test_f1),
    'best_hyperparameters': best_config,
    'feature_columns': feature_cols,
    'label_mapping': label_mapping,
    'architecture': {
        'input_dim': input_dim,
        'hidden_dims': hidden_dims,
        'num_classes': num_classes,
        'dropout': 0.3
    }
}

with open(model_dir / 'model_metadata.json', 'w') as f:
    json.dump(metadata, f, indent=2)

print(f"Metadata saved to {model_dir / 'model_metadata.json'}")

Metadata saved to ..\models\model_metadata.json


In [33]:
print("\n" + "="*60)
print("TRAINING COMPLETE")
print("="*60)
print(f"Final Test Accuracy: {test_accuracy:.4f}")
print(f"Final Test F1 Score: {test_f1:.4f}")
print(f"Best Configuration: {best_config}")
print(f"Model saved at: {model_dir / 'esg_risk_model.pt'}")
print("="*60)


TRAINING COMPLETE
Final Test Accuracy: 1.0000
Final Test F1 Score: 1.0000
Best Configuration: {'batch_size': 32, 'learning_rate': 0.001, 'weight_decay': 1e-05, 'num_epochs': 100}
Model saved at: ..\models\esg_risk_model.pt
