In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.model_selection import KFold
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.linear_model import Ridge
from sklearn.multioutput import MultiOutputRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from lightgbm import LGBMRegressor
from pytorch_tabular import TabularModel
from pytorch_tabular.models import FTTransformerConfig
from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig

torch.manual_seed(42)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load data
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

# Define columns
blend_cols = ['Component1_fraction', 'Component2_fraction', 'Component3_fraction', 
              'Component4_fraction', 'Component5_fraction']
prop_cols = [f'Component{i+1}_Property{j}' for i in range(5) for j in range(1, 11)]
target_cols = [f'BlendProperty{i}' for i in range(1, 11)]
feature_cols = [col for col in train_df.columns if col not in target_cols]

# Verify blend compositions
assert np.allclose(train_df[blend_cols].sum(axis=1), 1.0, atol=1e-5), "Train blend compositions must sum to 1"
assert np.allclose(test_df[blend_cols].sum(axis=1), 1.0, atol=1e-5), "Test blend compositions must sum to 1"
train_df[blend_cols] = np.clip(train_df[blend_cols], 0, 1)
test_df[blend_cols] = np.clip(test_df[blend_cols], 0, 1)

# Feature Engineering Functions
def compute_weighted_properties(df, blend_cols, prop_cols):
    weighted_props = {}
    for prop in range(1, 11):
        prop_col = [f'Component{i+1}_Property{prop}' for i in range(5)]
        weighted_props[f'Weighted_Property{prop}'] = sum(df[col] * df[blend_cols[i]] for i, col in enumerate(prop_col, 0))
    return pd.DataFrame(weighted_props)

def compute_statistical_aggregates(df, prop_cols):
    aggregates = {}
    for name, func in zip(['mean', 'std'], [np.mean, np.std]):
        aggregates[f'Prop_{name}'] = df[prop_cols].apply(func, axis=1)
    return pd.DataFrame(aggregates)

def compute_property_min_max(df, prop_cols):
    min_max = {}
    for prop in range(1, 11):
        prop_col = [f'Component{i}_Property{prop}' for i in range(1, 6)]
        min_max[f'Min_Property{prop}'] = df[prop_col].min(axis=1)
        min_max[f'Max_Property{prop}'] = df[prop_col].max(axis=1)
    return pd.DataFrame(min_max)

def compute_property_blend_interactions(df, blend_cols, prop_cols):
    interactions = {}
    for i in range(5):
        for j in range(1, 11):
            prop_col = f'Component{i+1}_Property{j}'
            interactions[f'Interaction_{blend_cols[i]}_{prop_col}'] = df[blend_cols[i]] * df[prop_col]
    return pd.DataFrame(interactions)

def compute_weighted_squared_properties(df, blend_cols, prop_cols):
    weighted_squared = {}
    for prop in range(1, 11):
        prop_col = [f'Component{i+1}_Property{prop}' for i in range(5)]
        weighted_squared[f'Weighted_Squared_Property{prop}'] = sum((df[col] ** 2) * df[blend_cols[i]] for i, col in enumerate(prop_col))
    return pd.DataFrame(weighted_squared)

def compute_property_deviations(df, blend_cols, prop_cols, weighted_props):
    deviations = {}
    for i in range(5):
        for j in range(1, 11):
            prop_col = f'Component{i+1}_Property{j}'
            weighted_col = f'Weighted_Property{j}'
            deviations[f'Deviation_{blend_cols[i]}_{prop_col}'] = df[blend_cols[i]] * (df[prop_col] - weighted_props[weighted_col])
    return pd.DataFrame(deviations)

def compute_property_variance(df, blend_cols, prop_cols, weighted_props):
    variance = {}
    for prop in range(1, 11):
        prop_col = [f'Component{i+1}_Property{prop}' for i in range(5)]
        weighted_col = f'Weighted_Property{prop}'
        variance[f'Variance_Property{prop}'] = sum(
            df[blend_cols[i]] * (df[prop_col[i]] - weighted_props[weighted_col]) ** 2 for i in range(5)
        )
    return pd.DataFrame(variance)

train_df = pd.concat([train_df, compute_weighted_properties(train_df, blend_cols, prop_cols),
                      compute_statistical_aggregates(train_df, prop_cols),
                      compute_property_min_max(train_df, prop_cols),
                      compute_property_blend_interactions(train_df, blend_cols, prop_cols),
                      compute_weighted_squared_properties(train_df, blend_cols, prop_cols),
                      compute_property_deviations(train_df, blend_cols, prop_cols, compute_weighted_properties(train_df, blend_cols, prop_cols)),
                      compute_property_variance(train_df, blend_cols, prop_cols, compute_weighted_properties(train_df, blend_cols, prop_cols))], axis=1)
test_df = pd.concat([test_df, compute_weighted_properties(test_df, blend_cols, prop_cols),
                     compute_statistical_aggregates(test_df, prop_cols),
                     compute_property_min_max(test_df, prop_cols),
                     compute_property_blend_interactions(test_df, blend_cols, prop_cols),
                     compute_weighted_squared_properties(test_df, blend_cols, prop_cols),
                     compute_property_deviations(test_df, blend_cols, prop_cols, compute_weighted_properties(test_df, blend_cols, prop_cols)),
                     compute_property_variance(test_df, blend_cols, prop_cols, compute_weighted_properties(test_df, blend_cols, prop_cols))], axis=1)

poly = PolynomialFeatures(degree=2, include_bias=False)
blend_poly_train = poly.fit_transform(train_df[blend_cols])
blend_poly_test = poly.transform(test_df[blend_cols])
for i, col in enumerate(poly.get_feature_names_out(blend_cols)):
    train_df[f'Poly_{col}'] = blend_poly_train[:, i]
    test_df[f'Poly_{col}'] = blend_poly_test[:, i]

for i in range(5):
    for j in range(i + 1, 5):
        train_df[f'Interaction_{blend_cols[i]}_{blend_cols[j]}'] = train_df[blend_cols[i]] * train_df[blend_cols[j]]
        test_df[f'Interaction_{blend_cols[i]}_{blend_cols[j]}'] = test_df[blend_cols[i]] * test_df[blend_cols[j]]
        train_df[f'Ratio_{blend_cols[i]}/{blend_cols[j]}'] = train_df[blend_cols[i]] / (train_df[blend_cols[j]] + 1e-6)
        test_df[f'Ratio_{blend_cols[i]}/{blend_cols[j]}'] = test_df[blend_cols[i]] / (test_df[blend_cols[j]] + 1e-6)


feature_cols = [col for col in train_df.columns if col not in target_cols]

def safe_mape(y_true, y_pred, epsilon=1e-6):
    return np.mean(np.abs((y_true - y_pred) / (np.abs(y_true) + epsilon)))

def select_features_per_target(df, target, feature_cols, threshold=0.1):
    correlations = df[feature_cols].corrwith(df[target]).abs()
    selected_features = correlations[correlations > threshold].index.tolist()
    if len(selected_features) < 10:
        selected_features = correlations.nlargest(10).index.tolist()
    return selected_features

class ANNModel(nn.Module):
    def _init_(self, input_dim):
        super(ANNModel, self)._init_()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 1)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

def train_ann_model(X_train, y_train, X_val, y_val, input_dim):
    model = ANNModel(input_dim).to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32).to(device), 
                                  torch.tensor(y_train, dtype=torch.float32).to(device))
    val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32).to(device), 
                                torch.tensor(y_val, dtype=torch.float32).to(device))
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32)
    
    best_val_loss = float('inf')
    patience = 10
    trigger_times = 0
    
    for epoch in range(100):
        model.train()
        train_loss = 0.0
        for inputs, targets in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for inputs, targets in val_loader:
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                val_loss += loss.item()
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            trigger_times = 0
        else:
            trigger_times += 1
            if trigger_times >= patience:
                break
    
    return model

def train_fttransformer_regressor(train_data, val_data, test_df, selected_cols, target, n_folds=5):
    """
    Trains an FTTransformerRegressor for a specific target property using pytorch_tabular.
    
    Parameters:
    - train_data: DataFrame with training data
    - val_data: DataFrame with validation data
    - test_df: DataFrame with test data
    - selected_cols: List of feature columns
    - target: Target column name
    - n_folds: Number of cross-validation folds
    
    Returns:
    - oof_pred: Out-of-fold predictions for validation data
    - test_pred: Test predictions averaged over folds
    """
    data_config = DataConfig(
        target=[target], continuous_cols=selected_cols, categorical_cols=[],
        continuous_feature_transform="quantile_normal", normalize_continuous_features=True
    )
    trainer_config = TrainerConfig(
        auto_lr_find=True, batch_size=32, max_epochs=50, early_stopping_patience=10,
        checkpoints=None, load_best=True
    )
    optimizer_config = OptimizerConfig()
    model_config = FTTransformerConfig(
        task="regression",
        metrics=["mean_squared_error"],
        metrics_params=[{}],
        learning_rate=0.001,
        num_heads=8
    )
    tabular_model = TabularModel(
        data_config=data_config,
        model_config=model_config,
        optimizer_config=optimizer_config,
        trainer_config=trainer_config
    )
    tabular_model.fit(train=train_data, validation=val_data)
    oof_pred = tabular_model.predict(val_data).iloc[:, 0].values
    test_pred = tabular_model.predict(test_df).iloc[:, 0].values / n_folds
    return oof_pred, test_pred

kf = KFold(n_splits=5, shuffle=True, random_state=42)
val_mapes = []
transformer_oof = np.zeros((len(train_df), len(target_cols)))
lgb_oof = np.zeros((len(train_df), len(target_cols)))
xgb_oof = np.zeros((len(train_df), len(target_cols)))
cat_oof = np.zeros((len(train_df), len(target_cols)))
ann_oof = np.zeros((len(train_df), len(target_cols)))
transformer_test = np.zeros((len(test_df), len(target_cols)))
lgb_test = np.zeros((len(test_df), len(target_cols)))
xgb_test = np.zeros((len(test_df), len(target_cols)))
cat_test = np.zeros((len(test_df), len(target_cols)))
ann_test = np.zeros((len(test_df), len(target_cols)))

for fold, (train_idx, val_idx) in enumerate(kf.split(train_df)):
    print(f"Fold {fold + 1}")
    train_data = train_df.iloc[train_idx].copy()
    val_data = train_df.iloc[val_idx].copy()

    
    train_data[feature_cols] = train_data[feature_cols].fillna(train_data[feature_cols].median())
    val_data[feature_cols] = val_data[feature_cols].fillna(train_data[feature_cols].median())
    test_df[feature_cols] = test_df[feature_cols].fillna(train_data[feature_cols].median())

    feature_sets = {target: select_features_per_target(train_data, target, feature_cols) for target in target_cols}

    
    for i, target in enumerate(target_cols):
        selected_cols = feature_sets[target]
        X_train = train_data[selected_cols].values
        X_val = val_data[selected_cols].values
        X_test = test_df[selected_cols].values

        # Normalize features
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_val_scaled = scaler.transform(X_val)
        X_test_scaled = scaler.transform(X_test)

        # Train FTTransformerRegressor for this target
        oof_pred, test_pred = train_fttransformer_regressor(train_data, val_data, test_df, selected_cols, target)
        transformer_oof[val_idx, i] = oof_pred
        transformer_test[:, i] += test_pred

        # Train and predict with LightGBM
        lgb_model = LGBMRegressor(n_estimators=700, learning_rate=0.03)
        lgb_model.fit(X_train_scaled, train_data[target_cols].values[:, i])
        lgb_oof[val_idx, i] = lgb_model.predict(X_val_scaled)
        lgb_test[:, i] += lgb_model.predict(X_test_scaled) / kf.n_splits

        # Train and predict with XGBoost
        xgb_model = XGBRegressor(n_estimators=700, learning_rate=0.03)
        xgb_model.fit(X_train_scaled, train_data[target_cols].values[:, i])
        xgb_oof[val_idx, i] = xgb_model.predict(X_val_scaled)
        xgb_test[:, i] += xgb_model.predict(X_test_scaled) / kf.n_splits

        # Train and predict with CatBoost
        cat_model = CatBoostRegressor(verbose=0, iterations=700, learning_rate=0.04)
        cat_model.fit(X_train_scaled, train_data[target_cols].values[:, i])
        cat_oof[val_idx, i] = cat_model.predict(X_val_scaled)
        cat_test[:, i] += cat_model.predict(X_test_scaled) / kf.n_splits

        # Train and predict with ANN
        ann_model = train_ann_model(X_train_scaled, train_data[target_cols].values[:, i].reshape(-1, 1),
                                    X_val_scaled, val_data[target_cols].values[:, i].reshape(-1, 1),
                                    input_dim=X_train_scaled.shape[1])
        ann_model.eval()
        with torch.no_grad():
            ann_oof[val_idx, i] = ann_model(torch.tensor(X_val_scaled, dtype=torch.float32).to(device)).cpu().numpy().flatten()
            ann_test[:, i] += ann_model(torch.tensor(X_test_scaled, dtype=torch.float32).to(device)).cpu().numpy().flatten() / kf.n_splits

    # Fold MAPE
    fold_mape = np.mean([safe_mape(val_data[target_cols].values[:, i], transformer_oof[val_idx, i]) for i in range(len(target_cols))])
    val_mapes.append(fold_mape)
    print(f"  Fold {fold + 1} MAPE: {fold_mape:.5f}")


# Meta-Ensemble using Bayesian Ridge
meta_X = np.concatenate([transformer_oof, lgb_oof, xgb_oof, cat_oof, ann_oof], axis=1)
meta_test = np.concatenate([transformer_test, lgb_test, xgb_test, cat_test, ann_test], axis=1)
meta_model = MultiOutputRegressor(BayesianRidge())
meta_model.fit(meta_X, train_df[target_cols].values)
final_preds = (  ann_test)
# Save submission
submission = pd.DataFrame(final_preds, columns=target_cols)
submission.insert(0, 'ID', test_df['ID'])
submission.to_csv('submission_hybrid.csv', index=False)
print("Saved submission_hybrid.csv")