In [13]:
import pandas as pd
import numpy as np

In [14]:
def normalize(df, feature_map_df):
    norm_df = pd.DataFrame(index=df.index)

    for _, row in feature_map_df.iterrows():
        feat = row['feature']
        status = row['status']

        if feat not in df.columns:
            print(f"Column '{feat}': skipping.")
            continue
        x = df[feat].astype(float)

        if status == 'ad':      # higher is better
            x_best, x_worst = x.max(), x.min()
        elif status == 'dis':   # lower is better
            x_best, x_worst = x.min(), x.max()
        else:
            raise ValueError(f"Unknown status '{status}' for feature '{feat}'.")

        denom = x_best - x_worst
        if denom == 0:
            norm_df[feat] = 0.0
        else:
            norm_df[feat] = (x - x_worst) / denom
    return norm_df


In [15]:
def _double_center(D):
    row_mean = D.mean(axis=1, keepdims=True)
    col_mean = D.mean(axis=0, keepdims=True)
    grand_mean = D.mean()
    return D - row_mean - col_mean + grand_mean

def distance_covariance(x, y):
    x = np.asarray(x, dtype=float).reshape(-1, 1)
    y = np.asarray(y, dtype=float).reshape(-1, 1)
    Dx = np.abs(x - x.T)
    Dy = np.abs(y - y.T)
    A = _double_center(Dx)
    B = _double_center(Dy)
    n = x.shape[0]
    dcov2 = (A * B).sum() / (n * n)
    return np.sqrt(max(dcov2, 0.0))


def distance_correlation(x, y):
    dcov_xy = distance_covariance(x, y)
    dcov_xx = distance_covariance(x, x)
    dcov_yy = distance_covariance(y, y)

    denom = np.sqrt(dcov_xx * dcov_yy)
    if denom == 0:
        return 0.0
    return dcov_xy / denom


In [16]:
def critic_weights(norm_df):
 
    criteria = norm_df.columns
    m, n = norm_df.shape

    s = norm_df.std(ddof=1)              # Ïƒ_j

    corr_mat = norm_df.corr(method="pearson")  # r_jk

    info_component = (1 - corr_mat).pow(2).sum(axis=1)
    info_content = s * np.sqrt(info_component)

    total_info = info_content.sum()
    if total_info == 0:
        raise ValueError("Total information content is zero; "
                         "check for constant or perfectly correlated columns.")
    weights = info_content / total_info
    weights.name = "CRITIC_weight"

    return weights, corr_mat

In [17]:
def d_critic_weights(norm_df):
    criteria = norm_df.columns
    m, n = norm_df.shape
    s = norm_df.std(ddof=1)

    dcor_mat = pd.DataFrame(np.eye(n), index=criteria, columns=criteria)

    for i, ci in enumerate(criteria):
        for j in range(i + 1, n):
            cj = criteria[j]
            dcor = distance_correlation(norm_df[ci].values, norm_df[cj].values)
            dcor_mat.loc[ci, cj] = dcor
            dcor_mat.loc[cj, ci] = dcor

    info_content = s * (1 - dcor_mat).sum(axis=1)

    total_info = info_content.sum()
    if total_info == 0:
        raise ValueError("Total information content is zero; check if all columns are constant.")

    weights = info_content / total_info
    weights.name = "D-CRITIC_weight"

    return weights, dcor_mat


In [18]:
feature_transport = pd.read_csv("feature/transportation.csv")
feature_waste = pd.read_csv("feature/waste_management.csv")
feature_water = pd.read_csv("feature/water.csv")
feature_accom = pd.read_csv("feature/accommodation.csv")
feature_energy = pd.read_csv("feature/energy.csv")

In [19]:
input = pd.read_csv("new_data.csv")

In [20]:
norm_transport = normalize(input, feature_transport)
norm_waste = normalize(input, feature_waste)
norm_water = normalize(input, feature_water)
norm_accom = normalize(input, feature_accom)
norm_energy = normalize(input, feature_energy)

In [21]:
weight_transport, mat_transport = d_critic_weights(norm_transport)
weight_waste, mat_waste = d_critic_weights(norm_waste)
weight_water, mat_water = d_critic_weights(norm_water)
weight_accom, mat_accom = d_critic_weights(norm_accom)
weight_energy, mat_energy = d_critic_weights(norm_energy)

In [22]:
weight_energy.head()

unsustainable_HVAC        0.338197
electricityPC_co2_2029    0.201562
Renewable                 0.204946
inverse_capacity          0.255295
Name: D-CRITIC_weight, dtype: float64

In [23]:
weight_transport.to_csv("critic_weight/d_weight_transport.csv")
weight_waste.to_csv("critic_weight/d_weight_waste.csv")
weight_water.to_csv("critic_weight/d_weight_water.csv")
weight_accom.to_csv("critic_weight/d_weight_accom.csv")
weight_energy.to_csv("critic_weight/d_weight_energy.csv")

# Sobol on raw input data