# üéØ Improved Attention-KAN - Clean Output Version
## 4 Strategies Combined - Minimal Logging

**Target:**
- Recall ‚â• 85%
- Accuracy ‚â• 70%
- Precision ‚â• 40%
- F1 ‚â• 50%

---

In [None]:
import os, glob, warnings
import numpy as np
import pandas as pd
from scipy.io import arff
from io import StringIO
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import *
from imblearn.over_sampling import SMOTE
import matplotlib.pyplot as plt
import seaborn as sns

warnings.filterwarnings('ignore')
sns.set_style('whitegrid')
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed(RANDOM_SEED)

print("‚úÖ Ready!")

In [None]:
# Attention-KAN Architecture
class FeatureAttention(nn.Module):
    def __init__(self, in_features):
        super().__init__()
        hidden = max(in_features // 2, 8)
        self.attention = nn.Sequential(nn.Linear(in_features, hidden), nn.ReLU(), nn.Dropout(0.2), nn.Linear(hidden, in_features), nn.Sigmoid())
        self.bn = nn.BatchNorm1d(in_features)
    def forward(self, x):
        return x * self.attention(self.bn(x)), self.attention(self.bn(x))

class KANLinear(nn.Module):
    def __init__(self, in_features, out_features, grid_size=5):
        super().__init__()
        self.grid = nn.Parameter(torch.linspace(-1, 1, grid_size).unsqueeze(0).unsqueeze(0).repeat(out_features, in_features, 1))
        self.coef = nn.Parameter(torch.randn(out_features, in_features, grid_size) * 0.1)
        self.base_weight = nn.Parameter(torch.randn(out_features, in_features) * 0.1)
    def forward(self, x):
        basis = torch.exp(-torch.abs(x.unsqueeze(1).unsqueeze(-1) - self.grid.unsqueeze(0)) ** 2 / 0.5)
        return (basis * self.coef.unsqueeze(0)).sum(dim=-1).sum(dim=-1) + torch.matmul(x, self.base_weight.t())

class AttentionKAN(nn.Module):
    def __init__(self, input_dim, hidden_dim=64, grid_size=5):
        super().__init__()
        self.attention = FeatureAttention(input_dim)
        self.kan1 = KANLinear(input_dim, hidden_dim, grid_size)
        self.kan2 = KANLinear(hidden_dim, hidden_dim // 2, grid_size)
        self.output = nn.Linear(hidden_dim // 2, 1)
        self.bn1, self.bn2 = nn.BatchNorm1d(hidden_dim), nn.BatchNorm1d(hidden_dim // 2)
        self.dropout = nn.Dropout(0.3)
    def forward(self, x):
        x, _ = self.attention(x)
        x = self.dropout(torch.relu(self.bn1(self.kan1(x))))
        x = self.dropout(torch.relu(self.bn2(self.kan2(x))))
        return torch.sigmoid(self.output(x))
    def get_feature_importance(self, X):
        self.eval()
        if not isinstance(X, torch.Tensor): X = torch.FloatTensor(X)
        with torch.no_grad():
            _, weights = self.attention(X.to(next(self.parameters()).device))
        return weights.cpu().numpy().mean(axis=0)

print("‚úÖ Architecture ready!")

In [None]:
# Focal Loss
class FocalLoss(nn.Module):
    def __init__(self, alpha=0.25, gamma=2.0, pos_weight=3.0):
        super().__init__()
        self.alpha, self.gamma, self.pos_weight = alpha, gamma, pos_weight
    def forward(self, inputs, targets):
        bce = nn.functional.binary_cross_entropy(inputs, targets, reduction='none')
        focal = (targets * self.alpha + (1 - targets) * (1 - self.alpha)) * (1 - torch.exp(-bce)) ** self.gamma * bce
        focal[targets == 1] *= self.pos_weight
        return focal.mean()

print("‚úÖ Focal Loss ready!")

In [None]:
# Training (silent mode)
def train_model(model, X_train, y_train, X_val, y_val, lr=0.01, epochs=30, pos_weight=3.0):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    X_train_t = torch.FloatTensor(X_train).to(device)
    y_train_t = torch.FloatTensor(y_train).unsqueeze(1).to(device)
    X_val_t = torch.FloatTensor(X_val).to(device)
    y_val_t = torch.FloatTensor(y_val).unsqueeze(1).to(device)
    
    loader = DataLoader(TensorDataset(X_train_t, y_train_t), batch_size=32, shuffle=True)
    criterion = FocalLoss(alpha=0.25, gamma=2.0, pos_weight=pos_weight)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    best_recall = 0
    patience, patience_counter = 10, 0
    
    for epoch in range(epochs):
        model.train()
        for batch_X, batch_y in loader:
            optimizer.zero_grad()
            criterion(model(batch_X), batch_y).backward()
            optimizer.step()
        
        model.eval()
        with torch.no_grad():
            val_recall = recall_score(y_val, (model(X_val_t) > 0.5).float().cpu().numpy(), zero_division=0)
        
        if val_recall > best_recall:
            best_recall = val_recall
            patience_counter = 0
        else:
            patience_counter += 1
        if patience_counter >= patience:
            break
    
    return model

print("‚úÖ Training ready!")

In [None]:
# Threshold Optimization (silent)
def find_optimal_threshold(model, X_val, y_val, target_recall=0.85):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.eval()
    with torch.no_grad():
        y_prob = model(torch.FloatTensor(X_val).to(device)).cpu().numpy().flatten()
    
    best_threshold, best_f1 = 0.5, 0
    for threshold in np.arange(0.1, 0.7, 0.05):
        y_pred = (y_prob >= threshold).astype(int)
        recall = recall_score(y_val, y_pred, zero_division=0)
        f1 = f1_score(y_val, y_pred, zero_division=0)
        if recall >= target_recall and f1 > best_f1:
            best_threshold, best_f1 = threshold, f1
    return best_threshold

print("‚úÖ Threshold optimization ready!")

In [None]:
# Data Loading
def load_arff(file_path):
    try:
        data, _ = arff.loadarff(file_path)
        df = pd.DataFrame(data)
        for col in df.columns:
            if df[col].dtype == object:
                try: df[col] = df[col].str.decode('utf-8')
                except: pass
        return df
    except:
        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
            content = f.read()
        return pd.read_csv(StringIO(content[content.lower().find('@data') + 5:].strip()), header=None)

def prepare_data(df):
    X = df.iloc[:, :-1].values.astype(np.float32)
    y = df.iloc[:, -1].values
    if y.dtype == object or y.dtype.name.startswith('str'): y = LabelEncoder().fit_transform(y)
    else: y = y.astype(np.int32)
    if np.any(np.isnan(X)): X[np.where(np.isnan(X))] = np.take(np.nanmedian(X, axis=0), np.where(np.isnan(X))[1])
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=RANDOM_SEED)
    scaler = MinMaxScaler()
    X_train, X_test = scaler.fit_transform(X_train), scaler.transform(X_test)
    
    try: X_train, y_train = SMOTE(sampling_strategy=0.8, random_state=RANDOM_SEED).fit_resample(X_train, y_train)
    except: pass
    
    return X_train, X_test, y_train, y_test

print("‚úÖ Data loading ready!")

In [None]:
# GWO (minimal logging)
class SimpleGWO:
    def __init__(self, bounds, fitness_func, n_wolves=6, n_iter=8):
        self.bounds = np.array(bounds)
        self.fitness_func = fitness_func
        self.n_wolves, self.n_iter = n_wolves, n_iter
        self.dim = len(bounds)
        self.positions = np.random.uniform(self.bounds[:, 0], self.bounds[:, 1], size=(n_wolves, self.dim))
        self.alpha_pos, self.alpha_score = np.zeros(self.dim), float('-inf')
        self.beta_pos, self.beta_score = np.zeros(self.dim), float('-inf')
        self.delta_pos, self.delta_score = np.zeros(self.dim), float('-inf')
    
    def optimize(self):
        for it in range(self.n_iter):
            for i in range(self.n_wolves):
                fitness = self.fitness_func(self.positions[i])
                if fitness > self.alpha_score:
                    self.delta_score, self.delta_pos = self.beta_score, self.beta_pos.copy()
                    self.beta_score, self.beta_pos = self.alpha_score, self.alpha_pos.copy()
                    self.alpha_score, self.alpha_pos = fitness, self.positions[i].copy()
                elif fitness > self.beta_score:
                    self.delta_score, self.delta_pos = self.beta_score, self.beta_pos.copy()
                    self.beta_score, self.beta_pos = fitness, self.positions[i].copy()
                elif fitness > self.delta_score:
                    self.delta_score, self.delta_pos = fitness, self.positions[i].copy()
            
            a = 2 - it * (2.0 / self.n_iter)
            for i in range(self.n_wolves):
                for j in range(self.dim):
                    r1, r2 = np.random.random(2)
                    X1 = self.alpha_pos[j] - (2 * a * r1 - a) * abs(2 * r2 * self.alpha_pos[j] - self.positions[i, j])
                    r1, r2 = np.random.random(2)
                    X2 = self.beta_pos[j] - (2 * a * r1 - a) * abs(2 * r2 * self.beta_pos[j] - self.positions[i, j])
                    r1, r2 = np.random.random(2)
                    X3 = self.delta_pos[j] - (2 * a * r1 - a) * abs(2 * r2 * self.delta_pos[j] - self.positions[i, j])
                    self.positions[i, j] = np.clip((X1 + X2 + X3) / 3.0, self.bounds[j, 0], self.bounds[j, 1])
        
        return self.alpha_pos, self.alpha_score

print("‚úÖ GWO ready!")

In [None]:
# Mini Ensemble
def train_ensemble(X_train, y_train, X_val, y_val, input_dim, hidden_dim, grid_size, lr, pos_weight):
    models = []
    for seed in [42, 123]:
        torch.manual_seed(seed)
        np.random.seed(seed)
        model = AttentionKAN(input_dim, hidden_dim, grid_size)
        models.append(train_model(model, X_train, y_train, X_val, y_val, lr, 30, pos_weight))
    torch.manual_seed(RANDOM_SEED)
    np.random.seed(RANDOM_SEED)
    return models

def ensemble_predict(models, X_test, threshold=0.5):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    X_t = torch.FloatTensor(X_test).to(device)
    preds = []
    for model in models:
        model.eval()
        with torch.no_grad():
            preds.append(model(X_t).cpu().numpy().flatten())
    avg_prob = np.mean(preds, axis=0)
    return (avg_prob >= threshold).astype(int), avg_prob

print("‚úÖ Ensemble ready!")

In [None]:
# Visualization
def plot_importance(model, X_data, dataset_name, top_k=15):
    importance = model.get_feature_importance(X_data)
    sorted_idx = np.argsort(importance)[::-1][:top_k]
    top_imp = importance[sorted_idx]
    top_names = [f'F{i}' for i in sorted_idx]
    
    fig, ax = plt.subplots(figsize=(10, 6))
    ax.barh(range(len(top_imp)), top_imp, color=plt.cm.viridis(top_imp / top_imp.max()))
    ax.set_yticks(range(len(top_imp)))
    ax.set_yticklabels(top_names)
    ax.set_xlabel('Attention Weight', fontweight='bold')
    ax.set_title(f'{dataset_name}: Top {top_k} Features', fontweight='bold')
    ax.invert_yaxis()
    ax.grid(axis='x', alpha=0.3)
    for i, v in enumerate(top_imp):
        ax.text(v + 0.01, i, f'{v:.3f}', va='center', fontsize=9)
    plt.tight_layout()
    plt.savefig(f'{dataset_name}_clean.png', dpi=300, bbox_inches='tight')
    plt.show()

print("‚úÖ Visualization ready!")

In [None]:
# Main Execution (CLEAN OUTPUT)
def run_experiment(dataset_dir='/content/drive/MyDrive/nasa-defect-gwo-kan/dataset'):
    files = [f for f in glob.glob(os.path.join(dataset_dir, '*.arff')) 
             if any(ds in os.path.basename(f).upper() for ds in ['PC1', 'CM1', 'KC1'])]
    
    results = []
    
    for file_path in files:
        dataset_name = os.path.basename(file_path).replace('.arff', '')
        print(f"\n{'='*60}")
        print(f"üìä {dataset_name}: Training started...")
        print('='*60)
        
        try:
            # Load
            df = load_arff(file_path)
            X_train, X_test, y_train, y_test = prepare_data(df)
            X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, stratify=y_train, random_state=RANDOM_SEED)
            input_dim = X_train.shape[1]
            
            # GWO
            print("üîß Optimizing hyperparameters...")
            def fitness(params):
                try:
                    model = AttentionKAN(input_dim, int(params[0]), int(params[1]))
                    model = train_model(model, X_train, y_train, X_val, y_val, params[2], 20, params[3])
                    threshold = find_optimal_threshold(model, X_val, y_val, 0.85)
                    y_pred = (model(torch.FloatTensor(X_val).to(next(model.parameters()).device)).detach().cpu().numpy().flatten() >= threshold).astype(int)
                    return 0.5 * recall_score(y_val, y_pred, zero_division=0) + 0.3 * f1_score(y_val, y_pred, zero_division=0) + 0.2 * accuracy_score(y_val, y_pred)
                except:
                    return 0.0
            
            gwo = SimpleGWO([(32, 96), (3, 7), (0.005, 0.02), (2.0, 5.0)], fitness, 6, 8)
            best_params, _ = gwo.optimize()
            hidden_dim, grid_size, lr, pos_weight = int(best_params[0]), int(best_params[1]), best_params[2], best_params[3]
            
            # Train ensemble
            print("ü§ñ Training ensemble...")
            models = train_ensemble(X_train, y_train, X_val, y_val, input_dim, hidden_dim, grid_size, lr, pos_weight)
            
            # Find threshold
            print("üéØ Optimizing threshold...")
            X_val_t = torch.FloatTensor(X_val).to(next(models[0].parameters()).device)
            val_preds = [m(X_val_t).detach().cpu().numpy().flatten() for m in models]
            val_avg = np.mean(val_preds, axis=0)
            best_threshold = 0.5
            best_f1 = 0
            for t in np.arange(0.1, 0.7, 0.05):
                y_pred = (val_avg >= t).astype(int)
                if recall_score(y_val, y_pred, zero_division=0) >= 0.85:
                    f1 = f1_score(y_val, y_pred, zero_division=0)
                    if f1 > best_f1:
                        best_threshold, best_f1 = t, f1
            
            # Test
            print("üìà Evaluating...")
            y_pred, y_prob = ensemble_predict(models, X_test, best_threshold)
            metrics = {
                'Accuracy': accuracy_score(y_test, y_pred),
                'Precision': precision_score(y_test, y_pred, zero_division=0),
                'Recall': recall_score(y_test, y_pred, zero_division=0),
                'F1': f1_score(y_test, y_pred, zero_division=0),
                'F2': fbeta_score(y_test, y_pred, beta=2, zero_division=0),
                'AUC': roc_auc_score(y_test, y_prob) if len(np.unique(y_test)) > 1 else 0
            }
            
            # Visualize
            print("üìä Creating heatmap...")
            plot_importance(models[0], X_test, dataset_name, 15)
            
            results.append({'Dataset': dataset_name, 'Threshold': best_threshold, **metrics})
            print(f"‚úÖ {dataset_name} complete!\n")
            
        except Exception as e:
            print(f"‚ùå Error: {e}\n")
    
    # Summary
    df = pd.DataFrame(results)
    avg = {'Dataset': 'AVERAGE'}
    for col in ['Accuracy', 'Precision', 'Recall', 'F1', 'F2', 'AUC']:
        avg[col] = df[col].mean()
    df = pd.concat([df, pd.DataFrame([avg])], ignore_index=True)
    return df

print("‚úÖ Ready to run!")

In [None]:
# RUN!
print("\n" + "="*60)
print("üöÄ IMPROVED ATTENTION-KAN - 4 STRATEGIES")
print("="*60)
print("\nüéØ Target: Recall‚â•85%, Accuracy‚â•70%, Precision‚â•40%, F1‚â•50%\n")

results = run_experiment('/content/drive/MyDrive/nasa-defect-gwo-kan/dataset')

print("\n" + "="*60)
print("üìä FINAL RESULTS")
print("="*60)
print(results.to_string(index=False))

results.to_excel('improved_clean_results.xlsx', index=False)
print("\nüíæ Saved: improved_clean_results.xlsx")

print("\n" + "="*60)
print("üéØ AVERAGE METRICS")
print("="*60)
avg = results[results['Dataset'] == 'AVERAGE'].iloc[0]
print(f"\n  Accuracy:  {avg['Accuracy']:.4f} {'‚úÖ' if avg['Accuracy'] >= 0.70 else '‚ùå'}")
print(f"  Precision: {avg['Precision']:.4f} {'‚úÖ' if avg['Precision'] >= 0.40 else '‚ùå'}")
print(f"  Recall:    {avg['Recall']:.4f} {'‚úÖ' if avg['Recall'] >= 0.85 else '‚ùå'} ‚≠ê")
print(f"  F1-Score:  {avg['F1']:.4f} {'‚úÖ' if avg['F1'] >= 0.50 else '‚ùå'}")
print(f"  F2-Score:  {avg['F2']:.4f}")
print(f"  AUC:       {avg['AUC']:.4f}")
print("\n" + "="*60)
print("‚úÖ COMPLETE!")
print("="*60)

In [None]:
# Comparison Plot
fig, axes = plt.subplots(2, 3, figsize=(15, 8))
fig.suptitle('Improved Attention-KAN Results', fontsize=16, fontweight='bold')
metrics = ['Accuracy', 'Precision', 'Recall', 'F1', 'F2', 'AUC']
colors = ['#3498db', '#e74c3c', '#2ecc71', '#f39c12', '#9b59b6', '#1abc9c']
plot_data = results[results['Dataset'] != 'AVERAGE']
for idx, (metric, color) in enumerate(zip(metrics, colors)):
    ax = axes[idx // 3, idx % 3]
    ax.barh(plot_data['Dataset'], plot_data[metric], color=color, alpha=0.7)
    ax.set_xlabel(metric, fontweight='bold')
    ax.set_xlim(0, 1)
    ax.grid(axis='x', alpha=0.3)
    if metric == 'Recall':
        ax.axvline(x=0.85, color='red', linestyle='--', linewidth=2)
        ax.set_facecolor('#ffe6e6')
        ax.set_title('‚≠ê PRIMARY ‚≠ê', fontsize=10, color='red')
    elif metric == 'Accuracy':
        ax.axvline(x=0.70, color='blue', linestyle='--', linewidth=2, alpha=0.5)
    elif metric == 'Precision':
        ax.axvline(x=0.40, color='orange', linestyle='--', linewidth=2, alpha=0.5)
    elif metric == 'F1':
        ax.axvline(x=0.50, color='purple', linestyle='--', linewidth=2, alpha=0.5)
plt.tight_layout()
plt.savefig('improved_clean_comparison.png', dpi=300, bbox_inches='tight')
plt.show()
print("üíæ Saved: improved_clean_comparison.png")