# Descriptive statistics on simulated results
Below is a compact, Jupyter-friendly tutorial for creating descriptive statistics from apsimNGpy simulation outputs—plus an “alternate print” path using tabulate (remember to install it first).

# Prerequisites

# core needed libraries
pip install apsimNGpy pandas

# optional pretty printing (used later)
pip install tabulate


First fetch the simulated output as a Pandas data frame


In [25]:
import pandas as pd
from apsimNGpy.core.apsim import ApsimModel

# Load a built-in template; omit the .apsimx suffix for templates
model = ApsimModel('Maize')

# If your file hasn’t been run yet, you may need:
model.run()

# Pull the “Report” table as a pandas DataFrame
df = model.get_simulated_output('Report')  # NOTE: correct quotes

# Optional: parse dates if present
if 'Date' in df.columns:
    df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

df.head()


Unnamed: 0,CheckpointID,SimulationID,Zone,Clock.Today,Maize.Phenology.CurrentStageName,Maize.AboveGround.Wt,Maize.AboveGround.N,Yield,Maize.Grain.Wt,Maize.Grain.Size,Maize.Grain.NumberFunction,Maize.Grain.Total.Wt,Maize.Grain.N,Maize.Total.Wt,source_table
0,1,1,Field,1991-05-28 00:00:00,HarvestRipe,1603.309641,15.57825,8469.615813,846.961581,0.278267,3043.698222,846.961581,11.178291,1728.427114,Report
1,1,1,Field,1992-04-09 00:00:00,HarvestRipe,849.734144,9.684291,4674.514452,467.451445,0.273804,1707.246422,467.451445,6.226327,922.393712,Report
2,1,1,Field,1993-03-16 00:00:00,HarvestRipe,182.766781,1.861545,555.02135,55.502135,0.304067,182.532674,55.502135,0.752357,204.10877,Report
3,1,1,Field,1994-03-15 00:00:00,HarvestRipe,795.133784,8.34436,3504.274669,350.427467,0.226733,1545.553056,350.427467,4.886844,869.242545,Report
4,1,1,Field,1995-04-04 00:00:00,HarvestRipe,1525.129268,16.481579,7820.119109,782.011911,0.273512,2859.155304,782.011911,10.463854,1665.483701,Report


Whole-dataset descriptive statistics

In [26]:
# Select numeric columns only
num = df.select_dtypes('number')

# Basic summary
basic = num.agg(['count', 'mean', 'std', 'min', 'median', 'max']).T

# Add CV% and IQR
q = num.quantile([0.25, 0.75]).T.rename(columns={0.25: 'q25', 0.75: 'q75'})
summary = (
    basic
    .join(q)
    .assign(cv_percent=lambda x: (x['std'] / x['mean']) * 100)
)[['count','mean','std','cv_percent','min','q25','median','q75','max']]

summary.round(3)


Unnamed: 0,count,mean,std,cv_percent,min,q25,median,q75,max
CheckpointID,10.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0
SimulationID,10.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0
Maize.AboveGround.Wt,10.0,1225.1,561.129,45.803,182.767,840.745,1318.364,1587.793,1968.461
Maize.AboveGround.N,10.0,12.381,5.4,43.611,1.862,8.679,13.336,16.461,18.309
Yield,10.0,5636.53,2895.961,51.378,555.021,3578.665,6033.993,8239.976,8823.516
Maize.Grain.Wt,10.0,563.653,289.596,51.378,55.502,357.866,603.399,823.998,882.352
Maize.Grain.Size,10.0,0.285,0.024,8.339,0.227,0.275,0.295,0.299,0.305
Maize.Grain.NumberFunction,10.0,1986.771,992.188,49.94,182.533,1339.924,2123.083,2846.481,3043.698
Maize.Grain.Total.Wt,10.0,563.653,289.596,51.378,55.502,357.866,603.399,823.998,882.352
Maize.Grain.N,10.0,7.459,3.754,50.331,0.752,4.926,7.989,10.927,11.254


# Grouped descriptive statistics (e.g., by simulation or treatment)
For this example, we need the `ExperimentManager`` Class, where we can create some varying treatment levels



In [27]:
from apsimNGpy.core.experimentmanager import ExperimentManager
experiment = ExperimentManager(model ='Maize')
# init the experiment
experiment.init_experiment(permutation=True)

In [28]:
# add factors
# Population 
experiment.add_factor(specification='[Sow using a variable rule].Script.Population = 4, 10')
# Nitrogen fertilizers
experiment.add_factor(specification='[Fertilise at sowing].Script.Amount= 0, 100, 250')

In [29]:
# run the model
experiment.run()


<apsimNGpy.core.experimentmanager.ExperimentManager at 0x1be070f7230>

In [30]:
# Pick one or more grouping columns that exist in your Report table
df = experiment.get_simulated_output('Report')
df.sort_values(by =['Amount', 'Population'], inplace=True, ascending=True)
possible_groups = ['Population', 'Amount']
# drop some useless columns at this moment
fd = df.drop(columns=['CheckpointID', 'SimulationID', 'Maize.Phenology.CurrentStageName', 'Maize.Grain.NumberFunction'])
# describe stats by Population density

fd.groupby('Population').describe()


Unnamed: 0_level_0,Maize.AboveGround.Wt,Maize.AboveGround.Wt,Maize.AboveGround.Wt,Maize.AboveGround.Wt,Maize.AboveGround.Wt,Maize.AboveGround.Wt,Maize.AboveGround.Wt,Maize.AboveGround.Wt,Maize.AboveGround.N,Maize.AboveGround.N,...,Maize.Grain.N,Maize.Grain.N,Maize.Total.Wt,Maize.Total.Wt,Maize.Total.Wt,Maize.Total.Wt,Maize.Total.Wt,Maize.Total.Wt,Maize.Total.Wt,Maize.Total.Wt
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
Population,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
10,30.0,1023.149316,679.233139,51.839211,376.512882,957.618178,1711.841339,2264.040816,30.0,9.226772,...,9.920687,13.799286,30.0,1117.193169,735.000161,54.190895,418.61514,1054.522347,1841.706002,2443.652362
4,30.0,833.514829,498.292113,95.929389,378.798654,730.381548,1304.292352,1581.477986,30.0,7.821089,...,8.471362,8.614249,30.0,911.572687,539.611861,98.643186,405.36254,822.260251,1423.753687,1708.187637


In [31]:
fd.groupby("Amount").describe()

Unnamed: 0_level_0,Maize.AboveGround.Wt,Maize.AboveGround.Wt,Maize.AboveGround.Wt,Maize.AboveGround.Wt,Maize.AboveGround.Wt,Maize.AboveGround.Wt,Maize.AboveGround.Wt,Maize.AboveGround.Wt,Maize.AboveGround.N,Maize.AboveGround.N,...,Maize.Grain.N,Maize.Grain.N,Maize.Total.Wt,Maize.Total.Wt,Maize.Total.Wt,Maize.Total.Wt,Maize.Total.Wt,Maize.Total.Wt,Maize.Total.Wt,Maize.Total.Wt
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
Amount,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
0,20.0,422.82301,330.286348,51.839211,168.40538,279.495675,625.191674,1104.547283,20.0,2.778272,...,2.763979,5.13554,20.0,459.99761,362.978715,54.190895,179.284589,298.047183,700.102432,1204.283831
100,20.0,1146.095642,505.533414,194.722345,757.606624,1085.634949,1557.735465,2121.567847,20.0,10.406211,...,8.540822,13.597501,20.0,1255.045211,539.389196,218.291141,840.215573,1236.516379,1683.34477,2288.694976
250,20.0,1216.077566,583.806947,173.308647,835.813804,1207.132779,1634.600521,2264.040816,20.0,12.387308,...,9.251442,13.799286,20.0,1328.105963,625.767507,193.82402,913.423963,1352.608751,1769.777315,2443.652362


That is incredible but produces a lot of data, some of which is distractive, so, let target columns

In [32]:
# group by population and summarize the values by Yield
df.groupby("Population")['Yield'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Population,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
10,30.0,4320.96374,3559.891362,0.0,1546.607701,2949.856609,8118.486552,10839.453802
4,30.0,3833.589273,2231.817671,510.191575,2005.485833,3643.115097,6524.34054,6628.079544


In [33]:
# group by amount and summarize the values by Yield
df.groupby("Amount")['Yield'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Amount,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,20.0,1768.197406,1258.126749,0.0,884.045308,1279.238795,2589.150347,4462.043386
100,20.0,5013.683796,2689.892217,870.224406,3150.963749,4087.518866,6611.628757,10442.634519
250,20.0,5449.948317,3129.788593,13.88136,3274.97011,5574.223813,7066.20054,10839.453802


A big surprise here is that the minimum for the lower nitrogen(N) rate was actually higher than for the high N rates

# Time-based summaries (monthly/annual), optionally per group
For these examples, a date column is needed to be available in the dataset

In [34]:
df['Date'] = pd.to_datetime(df['Clock.Today'])
time_groups = ['Population']

monthly = (
    df.set_index('Date')
      .groupby(time_groups)                    # ignored if empty
      .resample('YE')
      .mean(numeric_only=True)
      .reset_index()
)

monthly.head()


Unnamed: 0,Population,Date,CheckpointID,SimulationID,Maize.AboveGround.Wt,Maize.AboveGround.N,Yield,Maize.Grain.Wt,Maize.Grain.Size,Maize.Grain.NumberFunction,Maize.Grain.Total.Wt,Maize.Grain.N,Maize.Total.Wt
0,10,1991-12-31,1.0,2.666667,1542.284663,13.909819,7388.289675,738.828968,0.200222,3666.886974,738.828968,9.436889,1659.159867
1,10,1992-12-31,1.0,2.666667,585.478374,5.338768,2582.663832,258.266383,0.13559,1273.116007,258.266383,3.380911,639.322128
2,10,1993-12-31,1.0,2.666667,253.653281,1.89205,621.262215,62.126222,0.143312,360.079282,62.126222,0.74715,281.385589
3,10,1994-12-31,1.0,2.666667,626.201366,6.208011,2584.377106,258.437711,0.259815,970.082754,258.437711,3.426955,674.502828
4,10,1995-12-31,1.0,2.666667,1452.572318,14.129786,7021.758047,702.175805,0.19871,3399.770113,702.175805,9.197195,1583.62126


from tabulate import tabulate

# Pretty printing with Ipython.Display and html


In [35]:
from tabulate import tabulate


data =df.copy()
data = data[['Population', 'Amount', 'Maize.Grain.Size', 'Yield']]
summary = data.groupby(['Population', 'Amount']).describe()

g_flat = summary.reset_index()  # bring group keys back as columns

from IPython.display import display, HTML

data = df[['Population','Amount','Maize.Grain.Size','Yield']].copy()

g = data.groupby(['Population','Amount']).describe()

# flatten MultiIndex columns: ('Yield','mean') -> 'Yield_mean'
g.columns = ['_'.join(map(str, c)).strip('_') for c in g.columns]
g = g.reset_index().round(3)

display(HTML(f'<div style="max-width:100%; overflow-x:auto">{g.to_html(index=False)}</div>'))


Population,Amount,Maize.Grain.Size_count,Maize.Grain.Size_mean,Maize.Grain.Size_std,Maize.Grain.Size_min,Maize.Grain.Size_25%,Maize.Grain.Size_50%,Maize.Grain.Size_75%,Maize.Grain.Size_max,Yield_count,Yield_mean,Yield_std,Yield_min,Yield_25%,Yield_50%,Yield_75%,Yield_max
10,0,10.0,0.13,0.066,0.0,0.107,0.122,0.152,0.242,10.0,1625.022,1290.409,0.0,815.033,1178.387,2433.739,4352.845
10,100,10.0,0.244,0.067,0.127,0.217,0.259,0.3,0.308,10.0,5192.631,3332.632,880.617,2840.567,3649.625,8166.521,10442.635
10,250,10.0,0.252,0.091,0.0,0.255,0.274,0.298,0.308,10.0,6145.238,3930.067,13.881,2745.085,6698.639,9325.953,10839.454
4,0,10.0,0.151,0.051,0.097,0.107,0.143,0.189,0.246,10.0,1911.373,1277.079,510.192,967.549,1498.601,2620.041,4462.043
4,100,10.0,0.282,0.027,0.22,0.269,0.297,0.299,0.301,10.0,4834.737,2024.202,870.224,3562.199,5274.252,6572.486,6628.08
4,250,10.0,0.285,0.025,0.22,0.283,0.298,0.299,0.301,10.0,4754.659,2039.569,768.706,3478.219,5048.915,6572.486,6628.08


#  Export summaries to files

In [36]:
summary.to_csv("summary.csv")


## Tips & troubleshooting

* If `df` is empty, ensure your APSIM file has a **Report** node and (if needed) call `model.run()` before `get_simulated_output('Report')`.
* If you see many `NaN` in summary tables, make sure you selected **numeric** columns (as above).
* For **multiple** Report tables, pass the exact one you want (e.g., `'Report'`, `'DailyReport'`) to `get_simulated_output(...)`.
* Prefer **explicit grouping** (e.g., `Population`, `Amount`) to avoid mixing different scenarios in the same statistics.


In [37]:
from apsimNGpy.core.config import apsim_version
from apsimNGpy import version
from apsimNGpy.settings import logger
print(f"Notebook generated by;\n APSIM version: `{apsim_version()}`\n apsimNGpy version {version.version}")

Notebook generated by;
 APSIM version: `APSIM 2025.8.7844.0`
 apsimNGpy version 0.39.9.15
