In [1]:
import numpy as np
from pyswarm import pso  # Install via pip: pip install pyswarm

In [2]:
# Define the simulation function
def simulation_model(scaling_vector, input_data):
    """
    Simulates the output by scaling the input data with the scaling vector.

    Args:
        scaling_vector (array-like): Scaling factors for the input dataset.
        input_data (ndarray): Original input data of size (m x n).

    Returns:
        ndarray: Simulated output of size (m).
    """
    scaled_input = input_data * scaling_vector
    # Add your simulation logic here, e.g., applying a machine learning model or complex computation
    simulated_output = np.sum(scaled_input, axis=1)  # Example logic
    return simulated_output


In [3]:
# Define the objective function
def objective_function(scaling_vector, input_data, ground_truth):
    """
    Computes the deviation between the simulated output and the ground truth.

    Args:
        scaling_vector (array-like): Scaling factors for the input dataset.
        input_data (ndarray): Original input data of size (m x n).
        ground_truth (ndarray): Ground truth output of size (m).

    Returns:
        float: Total deviation (e.g., mean squared error).
    """
    simulated_output = simulation_model(scaling_vector, input_data)
    deviation = np.mean((simulated_output - ground_truth) ** 2)  # Mean Squared Error
    return deviation

In [None]:
# Load your data
# Replace with your actual input dataset and ground truth
m, n = 100, 10  # Example dimensions
np.random.seed(42)
input_data = np.random.rand(m, n)  # Example input data
ground_truth = np.random.rand(m)   # Example ground truth

# Define PSO bounds
lower_bounds = np.zeros(n)  # Lower bound for each scaling factor
upper_bounds = np.ones(n) * 2  # Upper bound for each scaling factor

# Run PSO
optimal_scaling, min_deviation = pso(
    func=objective_function,
    lb=lower_bounds,
    ub=upper_bounds,
    args=(input_data, ground_truth),
    swarmsize=50,  # Number of particles in the swarm
    maxiter=100,   # Maximum number of iterations
    debug=True     # Set to True for verbose output
)

# Output the results
print("Optimal Scaling Vector:", optimal_scaling)
print("Minimum Deviation Achieved:", min_deviation)

# Apply the optimal scaling vector to the input data
final_simulated_output = simulation_model(optimal_scaling, input_data)
print("Final Simulated Output:", final_simulated_output)

In [4]:
import pandas as pd

In [13]:
df_outputs = pd.read_csv('../output/experiments_batch_croatia_2/sim_outputs/sim_output_0.csv')
df_outputs.head()

Unnamed: 0,primary_id,region,time_period,area_agrc_crops_bevs_and_spices,area_agrc_crops_cereals,area_agrc_crops_fibers,area_agrc_crops_fruits,area_agrc_crops_herbs_and_other_perennial_crops,area_agrc_crops_nuts,area_agrc_crops_other_annual,...,yield_agrc_fruits_tonne,yield_agrc_herbs_and_other_perennial_crops_tonne,yield_agrc_nuts_tonne,yield_agrc_other_annual_tonne,yield_agrc_other_woody_perennial_tonne,yield_agrc_pulses_tonne,yield_agrc_rice_tonne,yield_agrc_sugar_cane_tonne,yield_agrc_tubers_tonne,yield_agrc_vegetables_and_vines_tonne
0,0,croatia,0,117203.308009,123884.486582,111423.108447,118079.897918,115569.670304,110137.206424,119794.839039,...,747143.548598,2842901.0,65007.316698,184069.643495,17597.693312,340019.125053,0.0,0.0,4291673.0,594326.334606
1,0,croatia,1,116579.513822,123225.132986,110830.078365,117451.438234,114954.570869,109551.020331,119157.25188,...,745262.510889,2874731.0,56652.834779,202919.255928,21494.892307,382418.067957,672.350393,369.849563,5057802.0,581171.707433
2,0,croatia,2,115958.077584,122568.271755,110239.289944,116825.354135,114341.796517,108967.050029,118522.074808,...,637939.741131,2883506.0,48564.564446,190350.514957,23757.576583,376860.190644,672.350393,369.849563,4622694.0,610748.161091
3,0,croatia,3,115338.997643,121913.901141,109650.741612,116201.643956,113731.345619,108385.293964,117889.306133,...,791208.327297,2868078.0,35144.266178,219573.250001,25910.580727,379617.919071,672.350393,369.849563,4369006.0,595807.457812
4,0,croatia,4,114722.272565,121262.019629,109064.432007,115580.306252,113123.21676,107805.75079,117258.944391,...,757909.641484,2849438.0,37444.328609,212177.812543,24311.768477,432699.55544,672.350393,369.849563,4504210.0,598418.688914


In [None]:
df_outputs[['primari_id', 'region', 'time_period']]

In [14]:
df_outputs.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72 entries, 0 to 71
Columns: 1453 entries, primary_id to yield_agrc_vegetables_and_vines_tonne
dtypes: float64(1440), int64(12), object(1)
memory usage: 817.4+ KB


In [15]:
df_outputs.iloc[0]

primary_id                                            0
region                                          croatia
time_period                                           0
area_agrc_crops_bevs_and_spices           117203.308009
area_agrc_crops_cereals                   123884.486582
                                              ...      
yield_agrc_pulses_tonne                   340019.125053
yield_agrc_rice_tonne                               0.0
yield_agrc_sugar_cane_tonne                         0.0
yield_agrc_tubers_tonne                  4291672.572065
yield_agrc_vegetables_and_vines_tonne     594326.334606
Name: 0, Length: 1453, dtype: object

In [34]:
emissions = df_outputs[[col for col in df_outputs.columns if col.startswith('emission_co2e_subsector')]].iloc[0]
emissions

emission_co2e_subsector_total_agrc    47.465509
emission_co2e_subsector_total_ccsq     0.000000
emission_co2e_subsector_total_entc    13.150221
emission_co2e_subsector_total_fgtv    16.529136
emission_co2e_subsector_total_frst     1.861490
emission_co2e_subsector_total_inen     0.363399
emission_co2e_subsector_total_ippu     1.794037
emission_co2e_subsector_total_lndu     0.469583
emission_co2e_subsector_total_lsmm     0.096193
emission_co2e_subsector_total_lvst     0.914863
emission_co2e_subsector_total_scoe     5.739309
emission_co2e_subsector_total_soil     4.234582
emission_co2e_subsector_total_trns    34.788860
emission_co2e_subsector_total_trww     0.196876
emission_co2e_subsector_total_waso     0.594532
Name: 0, dtype: float64

In [35]:
# Convert the series into a DataFrame and extract the subsector suffix
emissions = emissions.rename_axis('index').reset_index()
emissions['Subsector'] = emissions['index'].str.replace('emission_co2e_subsector_total_', '', regex=False)
emissions.rename(columns={0: 'sim_value'}, inplace=True)
emissions

Unnamed: 0,index,sim_value,Subsector
0,emission_co2e_subsector_total_agrc,47.465509,agrc
1,emission_co2e_subsector_total_ccsq,0.0,ccsq
2,emission_co2e_subsector_total_entc,13.150221,entc
3,emission_co2e_subsector_total_fgtv,16.529136,fgtv
4,emission_co2e_subsector_total_frst,1.86149,frst
5,emission_co2e_subsector_total_inen,0.363399,inen
6,emission_co2e_subsector_total_ippu,1.794037,ippu
7,emission_co2e_subsector_total_lndu,0.469583,lndu
8,emission_co2e_subsector_total_lsmm,0.096193,lsmm
9,emission_co2e_subsector_total_lvst,0.914863,lvst


In [36]:
sectoral_diff_report_df = pd.read_csv('sectoral_diff_report.csv')

In [39]:
# Merge the series DataFrame with the original DataFrame
merged_df = pd.merge(sectoral_diff_report_df, emissions[['Subsector', 'sim_value']], on='Subsector', how='left')
merged_df

Unnamed: 0,Subsector,simulation,Edgar_value,diff,Year,diff_percentage,sim_value
0,soil,1.142107,0.092259,11.379289,2015,1137.928941,4.234582
1,lndu,0.685409,0.06084,10.265855,2015,1026.585546,0.469583
2,ippu,11.167588,3.883217,1.87586,2015,187.585952,1.794037
3,scoe,7.547604,3.247697,1.323987,2015,132.398658,5.739309
4,entc,6.886827,3.312946,1.078762,2015,107.876206,13.150221
5,inen,4.069753,2.15734,0.886468,2015,88.646783,0.363399
6,agrc,1.778561,1.134139,0.568204,2015,56.820357,47.465509
7,trww,0.665286,0.503173,0.322182,2015,32.218156,0.196876
8,lvst,1.938988,1.682916,0.15216,2015,15.215987,0.914863
9,waso,1.300747,1.25869,0.033414,2015,3.341401,0.594532


In [41]:
merged_df = merged_df[['Subsector', 'Edgar_value', 'sim_value']]
merged_df

Unnamed: 0,Subsector,Edgar_value,sim_value
0,soil,0.092259,4.234582
1,lndu,0.06084,0.469583
2,ippu,3.883217,1.794037
3,scoe,3.247697,5.739309
4,entc,3.312946,13.150221
5,inen,2.15734,0.363399
6,agrc,1.134139,47.465509
7,trww,0.503173,0.196876
8,lvst,1.682916,0.914863
9,waso,1.25869,0.594532


In [42]:
mse = np.mean((merged_df['sim_value'] - merged_df['Edgar_value']) ** 2)

In [43]:
mse

241.3481382638385