In [None]:
!pip install optuna
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split,LeaveOneOut
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler
import optuna
from optuna.pruners import MedianPruner

df = pd.read_csv('/content/nasa93dataset.csv')
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values



X_train_full, X_test, y_train_full, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


X_train, X_val, y_train, y_val = train_test_split(X_train_full, y_train_full, test_size=0.3, random_state=42)


delta = 1e-4

# Similarity functions
def similarity_euclidean(x1, x2, w):
    return np.sqrt(np.sum(w * (x1 - x2) ** 2) + delta)

def similarity_manhattan(x1, x2, w):
    return np.sum(w * np.abs(x1 - x2)) + delta

def similarity_maximum_distance(x1, x2, w):
    return np.max(w * np.abs(x1 - x2))

def similarity_mahalanobis(x1, x2, w, cov_inv):
    diff = x1 - x2
    return np.sqrt(diff.T @ cov_inv @ diff + delta)

def similarity_akritean(x1, x2, w, omega1, omega2):
    return omega1 * similarity_euclidean(x1, x2, w) + omega2 * similarity_manhattan(x1, x2, w)

def inverse_weighted_mean(neighbor_efforts, similarities):
    similarities = 1 / (similarities + 1e-9)
    return np.sum((similarities / np.sum(similarities)) * neighbor_efforts)


# Optimization function
def optimise_parameters(X_train, y_train, X_val, y_val):
    cov_matrix = np.cov(X_train, rowvar=False) + np.eye(X_train.shape[1]) * delta
    cov_inv = np.linalg.inv(cov_matrix)

    def objective(trial):
        # Suggest feature and similarity weights
        feature_weights = np.array([trial.suggest_float(f'fw_{i}', 0, 1) for i in range(X.shape[1])])
        sim_weights = np.array([trial.suggest_float(f'sw_{i}', 0, 1) for i in range(5)])

        # Ensure the sum of weights is 1
        feature_weights /= feature_weights.sum()
        sim_weights /= sim_weights.sum()

        # Number of nearest neighbors
        k = min(trial.suggest_int('k', 3, 20), len(X_train))

        # Akritean similarity weights
        omega1 = trial.suggest_float('omega1', 0, 1)
        omega2 = 1 - omega1

        # Solution method
        solution_func = trial.suggest_categorical('solution', ['mean', 'median', 'inverse_mean'])

        # Define similarity functions
        similarity_functions = [
            similarity_euclidean,
            similarity_manhattan,
            similarity_maximum_distance,
            lambda x1, x2, w: similarity_mahalanobis(x1, x2, w, cov_inv),
            lambda x1, x2, w: similarity_akritean(x1, x2, w, omega1, omega2)
        ]

        # Apply feature weights
        weighted_X_train = X_train * feature_weights
        weighted_X_val = X_val * feature_weights

        predictions = []
        for x in weighted_X_val:
            effort_predictions = []

            for sim_fn in similarity_functions:
                similarities = np.array([sim_fn(x, train_row, feature_weights) for train_row in weighted_X_train])
                neighbor_indices = np.argsort(similarities)[:k]
                neighbor_efforts = y_train[neighbor_indices]

                if solution_func == 'mean':
                    pred = np.mean(neighbor_efforts)
                elif solution_func == 'median':
                    pred = np.median(neighbor_efforts)
                else:
                    pred = inverse_weighted_mean(neighbor_efforts, similarities[neighbor_indices])

                effort_predictions.append(pred)

            final_effort = np.sum(sim_weights * np.array(effort_predictions))
            predictions.append(final_effort)

        mae = mean_absolute_error(y_val, predictions)
        print(f"Trial {trial.number}: MAE = {mae}")
        return mae

    # Optimize with parallel execution and early stopping
    study = optuna.create_study(
        direction='minimize',
        pruner=MedianPruner(n_startup_trials=5, n_warmup_steps=2)
    )
    # Changed import for the callback
    study.optimize(objective, n_trials=100)

    # Display best result
    best_trial = study.best_trial
    print("\nBest Trial:")
    print(f"  MAE: {best_trial.value}")
    print("  Parameters:")
    for key, value in best_trial.params.items():
        print(f"    {key}: {value}")

    return best_trial.params

In [None]:
import numpy as np

def calculate_mmre(actuals, predictions):
    actuals = np.array(actuals)
    predictions = np.array(predictions)
    relative_errors = np.abs((actuals - predictions) / np.where(actuals == 0, 1e-10, actuals))
    return np.mean(relative_errors)

def calculate_bmmre(actuals, all_predictions):
    actuals = np.array(actuals)
    all_predictions = np.array(all_predictions)
    relative_errors = [np.abs((actuals - pred) / np.where(actuals == 0, 1e-10, actuals)) for pred in all_predictions]
    min_relative_errors = np.min(relative_errors, axis=0)
    return np.mean(min_relative_errors)


def calculate_mdmre(y_actual, y_pred):
    if len(y_actual) != len(y_pred):
        raise ValueError("y_actual and y_pred must have the same length.")

    # Calculate MRE for each prediction
    mre = np.abs((np.array(y_pred) - np.array(y_actual)) / np.array(y_actual))

    # Calculate the median of MRE
    mdmre = np.median(mre)

    return mdmre


def pred_25(y_true, y_pred):

    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    percentage_error = np.abs(y_true - y_pred) / y_true
    within_25_percent = np.sum(percentage_error <= 0.25)

    return within_25_percent / len(y_true)

In [None]:
loo = LeaveOneOut()
errors = []
final_predictions = []
predictions = []
actuals = []
it = 1

for train_val_index, test_index in loo.split(X):
    print(f"Iteration -{it}")
    it = it+1
    # Leave-One-Out: 1 row as test, the rest for training + validation
    X_train_val, X_test = X[train_val_index], X[test_index]
    y_train_val, y_test = y[train_val_index], y[test_index]

    # Split train_val into 70% Training, 30% Validation
    X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.3, random_state=42)
    best_params = optimise_parameters(X_train, y_train, X_val, y_val)

    # Final Model Evaluation using Test Set
    feature_weights = np.array([best_params[f'fw_{i}'] for i in range(X.shape[1])])
    k = best_params['k']
    sim_weights = np.array([best_params[f'sw_{i}'] for i in range(5)])
    omega1 = best_params['omega1']
    omega2 = 1 - omega1
    solution_func = best_params['solution']

    # Normalize weights
    feature_weights /= feature_weights.sum()
    sim_weights /= sim_weights.sum()

    cov_matrix = np.cov(X_train, rowvar=False) + np.eye(X_train.shape[1]) * 1e-3  # Regularization
    cov_inv = np.linalg.inv(cov_matrix)

    similarity_functions = [
        similarity_euclidean,
        similarity_manhattan,
        similarity_maximum_distance,
        lambda x1, x2, w: similarity_mahalanobis(x1, x2, w, cov_inv),
        lambda x1, x2, w: similarity_akritean(x1, x2, w, omega1, omega2)
    ]

    # Apply feature weights to training data
    weighted_X_train_val = X_train_val * feature_weights
    weighted_X_test = X_test[0] * feature_weights  # Single test row

    effort_predictions_test = []
    for sim_fn in similarity_functions:
        similarities = np.array([sim_fn(weighted_X_test, train_row, feature_weights) for train_row in weighted_X_train_val])
        neighbor_indices = np.argsort(similarities)[:k]
        neighbor_efforts = y_train_val[neighbor_indices]

        if solution_func == 'mean':
            pred = np.mean(neighbor_efforts)
        elif solution_func == 'median':
            pred = np.median(neighbor_efforts)
        elif solution_func == 'inverse_mean':
            pred = inverse_weighted_mean(neighbor_efforts, similarities[neighbor_indices])

        effort_predictions_test.append(pred)

    final_effort_test = np.sum(sim_weights * np.array(effort_predictions_test))

    # Compute MAE for test sample
    mae_test = abs(y_test[0] - final_effort_test)
    predictions.append(final_effort_test)
    actuals.append(y_test[0])
    errors.append(mae_test)


print(f"Mean Absolute Error (MAE) using LOOCV: {np.mean(errors)}")
print(errors)
print(np.mean(errors))
print(actuals)
print(predictions)

In [None]:
print(errors)
print(np.mean(errors))
print(actuals)
print(predictions)

In [None]:
from sklearn.model_selection import KFold

kfold = KFold(n_splits=4, shuffle=True, random_state=42)
errors = []
final_predictions = []
predictions = []
actuals = []

for train_val_index, test_index in kfold.split(X):
    # Leave-One-Out: 1 row as test, the rest for training + validation
    X_train_val, X_test = X[train_val_index], X[test_index]
    y_train_val, y_test = y[train_val_index], y[test_index]

    # Split train_val into 70% Training, 30% Validation
    X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.3, random_state=42)
    best_params = optimise_parameters(X_train, y_train, X_val, y_val)

    # Final Model Evaluation using Test Set
    feature_weights = np.array([best_params[f'fw_{i}'] for i in range(X.shape[1])])
    k = best_params['k']
    sim_weights = np.array([best_params[f'sw_{i}'] for i in range(5)])
    omega1 = best_params['omega1']
    omega2 = 1 - omega1
    solution_func = best_params['solution']

    # Normalize weights
    feature_weights /= feature_weights.sum()
    sim_weights /= sim_weights.sum()

    cov_matrix = np.cov(X_train, rowvar=False) + np.eye(X_train.shape[1]) * delta
    cov_inv = np.linalg.inv(cov_matrix)

    similarity_functions = [
        similarity_euclidean,
        similarity_manhattan,
        similarity_maximum_distance,
        lambda x1, x2, w: similarity_mahalanobis(x1, x2, w, cov_inv),
        lambda x1, x2, w: similarity_akritean(x1, x2, w, omega1, omega2)
    ]

    # Apply feature weights to training data
    weighted_X_train_val = X_train_val * feature_weights
    weighted_X_test = X_test[0] * feature_weights  # Single test row

    effort_predictions_test = []
    for sim_fn in similarity_functions:
        similarities = np.array([sim_fn(weighted_X_test, train_row, feature_weights) for train_row in weighted_X_train_val])
        neighbor_indices = np.argsort(similarities)[:k]
        neighbor_efforts = y_train_val[neighbor_indices]

        if solution_func == 'mean':
            pred = np.mean(neighbor_efforts)
        elif solution_func == 'median':
            pred = np.median(neighbor_efforts)
        elif solution_func == 'inverse_mean':
            pred = inverse_weighted_mean(neighbor_efforts, similarities[neighbor_indices])

        effort_predictions_test.append(pred)

    final_effort_test = np.sum(sim_weights * np.array(effort_predictions_test))

    # Compute MAE for test sample
    mae_test = np.mean(abs(y_test - final_effort_test))
    predictions.append(final_effort_test)
    actuals.append(y_test[0])
    errors.append(mae_test)


print(f"Mean Absolute Error (MAE) using LOOCV: {np.mean(errors)}")

In [None]:
print(errors)