In [1]:
import pandas as pd
import numpy as np

In [2]:

# Original data
data = {
    "AGRI": [167, 1, 163, 23, 41, 8, 6, 6],
    "SAAG": [162, 2, 141, 12, 40, 12, 4, 15],
    "PRIN": [119, 6, 69, 56, 39, 5, 13, 41],
    "CSUP": [87, 11, 63, 111, 27, 3, 18, 39],
    "CMOY": [103, 5, 68, 77, 32, 4, 11, 30],
    "EMPL": [111, 4, 72, 66, 34, 6, 10, 28],
    "OUVR": [130, 3, 76, 52, 43, 7, 7, 16],
    "INAC": [138, 7, 117, 74, 53, 8, 12, 20],
}
columns = ["PAO", "PAA", "VIO", "VIA", "POT", "LEC", "RAI", "PLP"]
original_df = pd.DataFrame(data, index=columns).T

In [3]:

# Target statistics
target_stats = {
    "PAO": {"mean": 127.3, "std": 27.9, "min": 87.49, "max": 167.34},
    "PAA": {"mean": 4.94, "std": 3.03, "min": 1.37, "max": 10.5},
    "VIO": {"mean": 96.09, "std": 38.66, "min": 63.47, "max": 162.79},
    "VIA": {"mean": 58.87, "std": 31.23, "min": 12.33, "max": 110.79},
    "POT": {"mean": 38.58, "std": 7.87, "min": 27.27, "max": 53.17},
    "LEC": {"mean": 6.53, "std": 2.81, "min": 2.95, "max": 11.96},
    "RAI": {"mean": 10.3, "std": 4.45, "min": 4.19, "max": 17.97},
    "PLP": {"mean": 24.17, "std": 12.33, "min": 5.58, "max": 40.62},
}

In [4]:

# Function to transform the data
def transform_variable(data, target_mean, target_std):
    original_mean = data.mean()
    original_std = data.std()
    z_scores = (data - original_mean) / original_std  # Standardize
    transformed = z_scores * target_std + target_mean  # Rescale
    return transformed

In [5]:

# Transform each variable to match the target statistics
transformed_data = pd.DataFrame()
for variable in columns:
    transformed_data[variable] = transform_variable(
        original_df[variable],
        target_mean=target_stats[variable]["mean"],
        target_std=target_stats[variable]["std"],
    )

In [6]:

# Ensure transformed data matches new ranges by capping to min/max
for variable in columns:
    min_val = target_stats[variable]["min"]
    max_val = target_stats[variable]["max"]
    transformed_data[variable] = np.clip(transformed_data[variable], min_val, max_val)

transformed_data


Unnamed: 0,PAO,PAA,VIO,VIA,POT,LEC,RAI,PLP
AGRI,167.167666,1.37,162.740161,23.093603,40.954133,7.897569,6.178826,5.670046
SAAG,162.168585,2.202318,140.814126,12.33,39.954498,11.875952,4.19,14.731248
PRIN,119.176494,6.011267,69.056197,56.002902,38.954863,4.913782,13.172333,40.62
CSUP,87.49,10.5,63.47,110.79,27.27,2.95,17.97,38.894453
CMOY,103.179437,5.05903,68.059559,76.945183,31.957418,3.919186,11.174188,29.833251
EMPL,111.177966,4.106793,72.04611,65.975417,33.956688,5.908378,10.175116,27.819651
OUVR,130.174471,3.154555,76.032662,52.013896,42.953403,6.902973,7.177899,15.738048
INAC,138.173,6.963504,116.894816,73.953429,52.949753,7.897569,12.173261,19.765249


In [7]:
# Export the transformed data to a CSV file in the data folderormed data to a CSV file in the data folder
transformed_data.to_csv('data/transformed_data.csv', index=True)
