In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/gdrive')

# Set base path for the project
base_path = '/content/gdrive/My Drive/Projects/CustomerSatisfactionLogistics/'

# Import required libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import ParameterGrid
import json
from datetime import datetime
import os


def configure_gpu():
    """Configure GPU settings"""
    if tf.config.list_physical_devices('GPU'):
        for gpu in tf.config.list_physical_devices('GPU'):
            tf.config.experimental.set_memory_growth(gpu, True)
        tf.keras.mixed_precision.set_global_policy('mixed_float16')
        return True
    return False

def prepare_single_feature_data(train_df, val_df, test_df, feature_name):
    """Prepare datasets with single feature"""
    scaler = StandardScaler()

    # Extract feature and target
    X_train = train_df[[feature_name]]
    X_val = val_df[[feature_name]]
    X_test = test_df[[feature_name]]

    # Scale the feature
    X_train_scaled = scaler.fit_transform(X_train)
    X_val_scaled = scaler.transform(X_val)
    X_test_scaled = scaler.transform(X_test)

    # Prepare targets (adjust to 0-4 range)
    y_train = train_df['Customer_Rating'] - 1
    y_val = val_df['Customer_Rating'] - 1
    y_test = test_df['Customer_Rating'] - 1

    return X_train_scaled, X_val_scaled, X_test_scaled, y_train, y_val, y_test

def create_model(input_dim, params):
    """Create neural network model"""
    model = models.Sequential([
        layers.Dense(params['units'], activation=params['activation'], input_dim=input_dim),
        layers.BatchNormalization(),
        layers.Dropout(params['dropout']),
        layers.Dense(params['units'] // 2, activation=params['activation']),
        layers.BatchNormalization(),
        layers.Dropout(params['dropout']),
        layers.Dense(5, activation='softmax')
    ])

    optimizer = tf.keras.optimizers.Adam(learning_rate=params['learning_rate'])
    model.compile(optimizer=optimizer,
                 loss='sparse_categorical_crossentropy',
                 metrics=['accuracy'])
    return model

def grid_search_single_feature(feature_name, X_train, X_val, X_test, y_train, y_val, y_test):
    """Perform grid search for single feature"""
    # Parameter grid
    param_grid = {
        'units': [16, 32, 64, 128],
        'activation': ['relu', 'elu', 'tanh'],
        'dropout': [0.1, 0.2, 0.3, 0.4],
        'learning_rate': [0.1, 0.01, 0.001],
        'batch_size': [32, 64, 128]
    }

    # Generate 100 random combinations
    all_combinations = list(ParameterGrid(param_grid))
    np.random.shuffle(all_combinations)
    param_combinations = all_combinations[:100]

    results = []
    best_accuracy = 0
    best_params = None

    print(f"\nTraining models for feature: {feature_name}")
    print(f"Number of parameter combinations: {len(param_combinations)}")

    for i, params in enumerate(param_combinations, 1):
        print(f"\nCombination {i}/100")
        print(f"Parameters: {params}")

        model = create_model(X_train.shape[1], params)

        # Early stopping
        early_stopping = tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=5,
            restore_best_weights=True
        )

        # Train model
        history = model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=30,
            batch_size=params['batch_size'],
            callbacks=[early_stopping],
            verbose=0
        )

        # Evaluate model
        train_loss, train_acc = model.evaluate(X_train, y_train, verbose=0)
        val_loss, val_acc = model.evaluate(X_val, y_val, verbose=0)
        test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)

        result = {
            'feature': feature_name,
            'params': params,
            'train_accuracy': float(train_acc),
            'val_accuracy': float(val_acc),
            'test_accuracy': float(test_acc),
            'train_loss': float(train_loss),
            'val_loss': float(val_loss),
            'test_loss': float(test_loss),
            'epochs_trained': len(history.history['loss'])
        }

        results.append(result)

        if val_acc > best_accuracy:
            best_accuracy = val_acc
            best_params = params

        print(f"Train Accuracy: {train_acc:.4f}")
        print(f"Validation Accuracy: {val_acc:.4f}")
        print(f"Test Accuracy: {test_acc:.4f}")

    return results, best_params

def run_feature_importance_validation(base_path):
    """Run the complete feature importance validation experiment"""
    # Configure GPU
    configure_gpu()

    # Create results directory within the project folder
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    results_dir = os.path.join(base_path, f"feature_validation_results_{timestamp}")
    os.makedirs(results_dir, exist_ok=True)

    # Load data from Google Drive
    train_df = pd.read_csv(os.path.join(base_path, 'train_set.csv'))
    val_df = pd.read_csv(os.path.join(base_path, 'validation_set.csv'))
    test_df = pd.read_csv(os.path.join(base_path, 'test_set.csv'))

    # Features to validate
    features = ['Reached_on_Time', 'Cost_of_the_Product', 'Customer_Care_Calls']

    all_results = {}
    feature_summaries = {}

    for feature in features:
        print(f"\nValidating feature: {feature}")

        # Prepare data
        X_train, X_val, X_test, y_train, y_val, y_test = prepare_single_feature_data(
            train_df, val_df, test_df, feature
        )

        # Perform grid search
        results, best_params = grid_search_single_feature(
            feature, X_train, X_val, X_test, y_train, y_val, y_test
        )

        # Store results
        all_results[feature] = results

        # Calculate summary statistics
        val_accuracies = [r['val_accuracy'] for r in results]
        feature_summaries[feature] = {
            'mean_val_accuracy': np.mean(val_accuracies),
            'max_val_accuracy': np.max(val_accuracies),
            'min_val_accuracy': np.min(val_accuracies),
            'std_val_accuracy': np.std(val_accuracies),
            'best_params': best_params
        }

        # Save individual feature results to Google Drive
        feature_df = pd.DataFrame(results)
        feature_df.to_csv(os.path.join(results_dir, f"{feature}_results.csv"), index=False)

    # Save summary results to Google Drive
    with open(os.path.join(results_dir, "feature_summaries.json"), 'w') as f:
        json.dump(feature_summaries, f, indent=4)

    # Create comparison DataFrame
    comparison_data = []
    for feature, summary in feature_summaries.items():
        comparison_data.append({
            'Feature': feature,
            'Mean Validation Accuracy': summary['mean_val_accuracy'],
            'Max Validation Accuracy': summary['max_val_accuracy'],
            'Min Validation Accuracy': summary['min_val_accuracy'],
            'Std Validation Accuracy': summary['std_val_accuracy']
        })

    comparison_df = pd.DataFrame(comparison_data)
    comparison_df.to_csv(os.path.join(results_dir, "feature_comparison.csv"), index=False)

    print("\nExperiment completed!")
    print(f"Results saved in directory: {results_dir}")
    print("\nFeature Comparison Summary:")
    print(comparison_df)

    return all_results, feature_summaries, comparison_df

if __name__ == "__main__":
    # Run the validation with the Google Drive path
    all_results, feature_summaries, comparison_df = run_feature_importance_validation(base_path)

Mounted at /content/gdrive

Validating feature: Reached_on_Time

Training models for feature: Reached_on_Time
Number of parameter combinations: 100

Combination 1/100
Parameters: {'activation': 'tanh', 'batch_size': 32, 'dropout': 0.3, 'learning_rate': 0.01, 'units': 128}


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Train Accuracy: 0.2138
Validation Accuracy: 0.2109
Test Accuracy: 0.1852

Combination 2/100
Parameters: {'activation': 'tanh', 'batch_size': 128, 'dropout': 0.1, 'learning_rate': 0.01, 'units': 32}
Train Accuracy: 0.2074
Validation Accuracy: 0.1996
Test Accuracy: 0.1966

Combination 3/100
Parameters: {'activation': 'elu', 'batch_size': 128, 'dropout': 0.2, 'learning_rate': 0.001, 'units': 16}
Train Accuracy: 0.2074
Validation Accuracy: 0.1996
Test Accuracy: 0.1966

Combination 4/100
Parameters: {'activation': 'elu', 'batch_size': 64, 'dropout': 0.2, 'learning_rate': 0.01, 'units': 64}
Train Accuracy: 0.2005
Validation Accuracy: 0.2095
Test Accuracy: 0.2062

Combination 5/100
Parameters: {'activation': 'tanh', 'batch_size': 64, 'dropout': 0.4, 'learning_rate': 0.001, 'units': 64}
Train Accuracy: 0.2138
Validation Accuracy: 0.2109
Test Accuracy: 0.1852

Combination 6/100
Parameters: {'activation': 'elu', 'batch_size': 64, 'dropout': 0.4, 'learning_rate': 0.01, 'units': 64}
Train Accuracy

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Train Accuracy: 0.2168
Validation Accuracy: 0.2152
Test Accuracy: 0.1937

Combination 2/100
Parameters: {'activation': 'elu', 'batch_size': 64, 'dropout': 0.1, 'learning_rate': 0.1, 'units': 64}
Train Accuracy: 0.2054
Validation Accuracy: 0.2116
Test Accuracy: 0.2153

Combination 3/100
Parameters: {'activation': 'elu', 'batch_size': 128, 'dropout': 0.3, 'learning_rate': 0.01, 'units': 16}
Train Accuracy: 0.2028
Validation Accuracy: 0.2102
Test Accuracy: 0.1903

Combination 4/100
Parameters: {'activation': 'elu', 'batch_size': 128, 'dropout': 0.4, 'learning_rate': 0.01, 'units': 64}
Train Accuracy: 0.2072
Validation Accuracy: 0.2138
Test Accuracy: 0.1903

Combination 5/100
Parameters: {'activation': 'elu', 'batch_size': 128, 'dropout': 0.3, 'learning_rate': 0.001, 'units': 64}
Train Accuracy: 0.1926
Validation Accuracy: 0.1974
Test Accuracy: 0.1920

Combination 6/100
Parameters: {'activation': 'relu', 'batch_size': 64, 'dropout': 0.3, 'learning_rate': 0.001, 'units': 16}
Train Accuracy:

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Train Accuracy: 0.1989
Validation Accuracy: 0.2173
Test Accuracy: 0.2074

Combination 2/100
Parameters: {'activation': 'tanh', 'batch_size': 32, 'dropout': 0.3, 'learning_rate': 0.1, 'units': 64}
Train Accuracy: 0.2053
Validation Accuracy: 0.2145
Test Accuracy: 0.1977

Combination 3/100
Parameters: {'activation': 'tanh', 'batch_size': 128, 'dropout': 0.3, 'learning_rate': 0.001, 'units': 64}
Train Accuracy: 0.2099
Validation Accuracy: 0.2202
Test Accuracy: 0.1949

Combination 4/100
Parameters: {'activation': 'elu', 'batch_size': 128, 'dropout': 0.2, 'learning_rate': 0.01, 'units': 128}
Train Accuracy: 0.2070
Validation Accuracy: 0.2024
Test Accuracy: 0.2091

Combination 5/100
Parameters: {'activation': 'relu', 'batch_size': 32, 'dropout': 0.3, 'learning_rate': 0.01, 'units': 64}
Train Accuracy: 0.2074
Validation Accuracy: 0.2216
Test Accuracy: 0.1926

Combination 6/100
Parameters: {'activation': 'tanh', 'batch_size': 128, 'dropout': 0.4, 'learning_rate': 0.1, 'units': 64}
Train Accurac