# Descriptive statistics on simulated results
Below is a compact, Jupyter-friendly tutorial for creating descriptive statistics from apsimNGpy simulation outputs—plus an “alternate print” path using tabulate (remember to install it first).

# Prerequisites

# core need libraries
pip install apsimNGpy pandas

# optional pretty printing (used later)
pip install tabulate


Firs fetch the simulated output as a Pandas data frame


In [1]:
import pandas as pd
from apsimNGpy.core.apsim import ApsimModel

# Load a built-in template; omit the .apsimx suffix for templates
model = ApsimModel('Maize')

# If your file hasn’t been run yet, you may need:
model.run()

# Pull the “Report” table as a pandas DataFrame
df = model.get_simulated_output('Report')  # NOTE: correct quotes

# Optional: parse dates if present
if 'Date' in df.columns:
    df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

df.head()


Unnamed: 0,CheckpointID,SimulationID,Zone,Clock.Today,Maize.Phenology.CurrentStageName,Maize.AboveGround.Wt,Maize.AboveGround.N,Yield,Maize.Grain.Wt,Maize.Grain.Size,Maize.Grain.NumberFunction,Maize.Grain.Total.Wt,Maize.Grain.N,Maize.Total.Wt,source_table
0,1,1,Field,1991-05-28 00:00:00,HarvestRipe,1603.309641,15.57825,8469.615813,846.961581,0.278267,3043.698222,846.961581,11.178291,1728.427114,Report
1,1,1,Field,1992-04-09 00:00:00,HarvestRipe,849.734144,9.684291,4674.514452,467.451445,0.273804,1707.246422,467.451445,6.226327,922.393712,Report
2,1,1,Field,1993-03-16 00:00:00,HarvestRipe,182.766781,1.861545,555.02135,55.502135,0.304067,182.532674,55.502135,0.752357,204.10877,Report
3,1,1,Field,1994-03-15 00:00:00,HarvestRipe,795.133784,8.34436,3504.274669,350.427467,0.226733,1545.553056,350.427467,4.886844,869.242545,Report
4,1,1,Field,1995-04-04 00:00:00,HarvestRipe,1525.129268,16.481579,7820.119109,782.011911,0.273512,2859.155304,782.011911,10.463854,1665.483701,Report


Whole-dataset descriptive statistics

In [2]:
# Select numeric columns only
num = df.select_dtypes('number')

# Basic summary
basic = num.agg(['count', 'mean', 'std', 'min', 'median', 'max']).T

# Add CV% and IQR
q = num.quantile([0.25, 0.75]).T.rename(columns={0.25: 'q25', 0.75: 'q75'})
summary = (
    basic
    .join(q)
    .assign(cv_percent=lambda x: (x['std'] / x['mean']) * 100)
)[['count','mean','std','cv_percent','min','q25','median','q75','max']]

summary.round(3)


Unnamed: 0,count,mean,std,cv_percent,min,q25,median,q75,max
CheckpointID,10.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0
SimulationID,10.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0
Maize.AboveGround.Wt,10.0,1225.1,561.129,45.803,182.767,840.745,1318.364,1587.793,1968.461
Maize.AboveGround.N,10.0,12.381,5.4,43.611,1.862,8.679,13.336,16.461,18.309
Yield,10.0,5636.53,2895.961,51.378,555.021,3578.665,6033.993,8239.976,8823.516
Maize.Grain.Wt,10.0,563.653,289.596,51.378,55.502,357.866,603.399,823.998,882.352
Maize.Grain.Size,10.0,0.285,0.024,8.339,0.227,0.275,0.295,0.299,0.305
Maize.Grain.NumberFunction,10.0,1986.771,992.188,49.94,182.533,1339.924,2123.083,2846.481,3043.698
Maize.Grain.Total.Wt,10.0,563.653,289.596,51.378,55.502,357.866,603.399,823.998,882.352
Maize.Grain.N,10.0,7.459,3.754,50.331,0.752,4.926,7.989,10.927,11.254


# Grouped descriptive statistics (e.g., by simulation or treatment)
For this example, we need the `ExperimentManager`` Class, where we can create some varying treatment levels



In [12]:
from apsimNGpy.core.experimentmanager import ExperimentManager
experiment = ExperimentManager(model ='Maize')
# init the experiment
experiment.init_experiment(permutation=True)

In [14]:
# add factors
# Population 
experiment.add_factor(specification='[Sow using a variable rule].Script.Population = 4, 6, 8, 10')
# Nitrogen fertilizers
experiment.add_factor(specification='[Fertilise at sowing].Script.Amount= 0, 100,150, 200, 250')

In [16]:
# run the model
experiment.run()


<apsimNGpy.core.experimentmanager.ExperimentManager at 0x2935f6ddba0>

In [28]:
# Pick one or more grouping columns that exist in your Report table
df = experiment.get_simulated_output('Report')
df.sort_values(by =['Amount', 'Population'], inplace=True, ascending=True)
possible_groups = ['Population', 'Amount']
# describe stats by Population density
df.groupby('Population').describe()


Unnamed: 0_level_0,CheckpointID,CheckpointID,CheckpointID,CheckpointID,CheckpointID,CheckpointID,CheckpointID,CheckpointID,SimulationID,SimulationID,...,Maize.Grain.N,Maize.Grain.N,Maize.Total.Wt,Maize.Total.Wt,Maize.Total.Wt,Maize.Total.Wt,Maize.Total.Wt,Maize.Total.Wt,Maize.Total.Wt,Maize.Total.Wt
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
Population,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
10,50.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,50.0,6.8,...,12.45385,13.852686,50.0,1289.447656,736.654239,54.190895,868.511797,1158.386221,1988.650295,2457.083319
4,50.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,50.0,9.2,...,8.475352,8.614249,50.0,1001.068741,521.145254,98.643186,761.233392,956.896017,1426.426662,1708.187637
6,50.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,50.0,13.8,...,10.464149,11.253935,50.0,1150.718224,640.23468,71.534517,853.918551,1083.421837,1690.019368,2131.573319
8,50.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,50.0,12.2,...,11.484284,12.699701,50.0,1229.296565,691.742681,53.72151,887.970166,1116.849161,1853.933112,2317.939766


In [29]:
df.groupby("Amount").describe()

Unnamed: 0_level_0,CheckpointID,CheckpointID,CheckpointID,CheckpointID,CheckpointID,CheckpointID,CheckpointID,CheckpointID,SimulationID,SimulationID,...,Maize.Grain.N,Maize.Grain.N,Maize.Total.Wt,Maize.Total.Wt,Maize.Total.Wt,Maize.Total.Wt,Maize.Total.Wt,Maize.Total.Wt,Maize.Total.Wt,Maize.Total.Wt
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
Amount,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
0,40.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,40.0,9.25,...,2.656708,5.184583,40.0,456.329579,354.116147,53.72151,171.719439,294.275738,744.446378,1204.283831
100,40.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,40.0,9.5,...,9.415009,13.597501,40.0,1285.598734,541.85311,218.291141,884.461655,1206.529259,1719.044626,2288.694976
150,40.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,40.0,10.5,...,11.197194,13.852686,40.0,1376.46344,606.64339,194.028708,946.894895,1381.721535,1863.525228,2457.083319
200,40.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,40.0,10.25,...,11.197202,13.799295,40.0,1363.717576,614.948907,193.82402,937.570871,1362.471767,1857.630645,2443.654397
250,40.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,40.0,13.0,...,11.166839,13.799286,40.0,1356.054652,616.53716,193.82402,936.024866,1352.608751,1848.713666,2443.652362


That is incredible but produces a lot of data, some of which is distractive, so, let target columns

In [30]:
# group by population and summarize the values by Yield
df.groupby("Population")['Yield'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Population,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
10,50.0,5093.729388,3733.951073,0.0,2252.624637,3627.256662,9198.856975,10881.111792
4,50.0,4212.521692,2169.197622,510.191575,3085.704298,3984.209202,6549.73368,6628.079544
6,50.0,4793.973779,2934.596152,241.859716,2895.653506,4105.005437,7821.202659,8823.530799
8,50.0,4963.756593,3337.240514,0.094526,2541.655824,3665.097489,8496.972606,9949.424774


In [31]:
# group by amount and summarize the values by Yield
df.groupby("Amount")['Yield'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Amount,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,40.0,1738.677801,1236.562104,0.0,862.028082,1250.717596,2576.077671,4490.541718
100,40.0,5179.23482,2679.061884,870.224406,3408.963611,4087.518866,7626.215922,10442.634519
150,40.0,5692.757501,3039.22813,481.337403,3315.165489,5992.029047,8477.449973,10881.111792
200,40.0,5628.385758,3078.335394,12.979605,3306.51089,5809.261385,8476.460558,10839.46088
250,40.0,5590.920934,3080.229281,13.88136,3290.721995,5574.223813,8402.826601,10839.453802


A big surprise here is that the minimum for the lower nitrogen(N) rate was actually higher than for the high N rates

# Time-based summaries (monthly/annual), optionally per group
For these examples, a date column is need to be available in the dataset

In [36]:
df['Date'] = pd.to_datetime(df['Clock.Today'])
time_groups = ['Population']

monthly = (
    df.set_index('Date')
      .groupby(time_groups)                    # ignored if empty
      .resample('YE')
      .mean(numeric_only=True)
      .reset_index()
)

monthly.head()


Unnamed: 0,Population,Date,CheckpointID,SimulationID,Maize.AboveGround.Wt,Maize.AboveGround.N,Yield,Maize.Grain.Wt,Maize.Grain.Size,Maize.Grain.NumberFunction,Maize.Grain.Total.Wt,Maize.Grain.N,Maize.Total.Wt
0,10,1991-12-31,1.0,6.8,1658.403538,15.372865,8179.93924,817.993924,0.220676,3690.828267,817.993924,10.670177,1781.102515
1,10,1992-12-31,1.0,6.8,775.023209,7.383173,3756.187249,375.618725,0.188569,1587.190677,375.618725,4.88813,841.899215
2,10,1993-12-31,1.0,6.8,242.859735,1.880137,471.620731,47.162073,0.148485,246.854345,47.162073,0.598606,269.890216
3,10,1994-12-31,1.0,6.8,726.619488,7.269006,2975.779427,297.577943,0.263948,1109.657831,297.577943,3.970384,784.14749
4,10,1995-12-31,1.0,6.8,1605.635285,16.433165,7891.747156,789.174716,0.220087,3498.771776,789.174716,10.510011,1748.366286


from tabulate import tabulate

# Pretty printing withIpython.Display and html


In [51]:
from tabulate import tabulate


data =df.copy()
data = data[['Population', 'Amount', 'Maize.Grain.Size', 'Yield']]
summary = data.groupby(['Population', 'Amount']).describe()

g_flat = summary.reset_index()  # bring group keys back as columns

from IPython.display import display, HTML

data = df[['Population','Amount','Maize.Grain.Size','Yield']].copy()

g = data.groupby(['Population','Amount']).describe()

# flatten MultiIndex columns: ('Yield','mean') -> 'Yield_mean'
g.columns = ['_'.join(map(str, c)).strip('_') for c in g.columns]
g = g.reset_index().round(3)

display(HTML(f'<div style="max-width:100%; overflow-x:auto">{g.to_html(index=False)}</div>'))


Population,Amount,Maize.Grain.Size_count,Maize.Grain.Size_mean,Maize.Grain.Size_std,Maize.Grain.Size_min,Maize.Grain.Size_25%,Maize.Grain.Size_50%,Maize.Grain.Size_75%,Maize.Grain.Size_max,Yield_count,Yield_mean,Yield_std,Yield_min,Yield_25%,Yield_50%,Yield_75%,Yield_max
10,0,10.0,0.13,0.066,0.0,0.107,0.122,0.152,0.242,10.0,1625.022,1290.409,0.0,815.033,1178.387,2433.739,4352.845
10,100,10.0,0.244,0.067,0.127,0.217,0.259,0.3,0.308,10.0,5192.631,3332.632,880.617,2840.567,3649.625,8166.521,10442.635
10,150,10.0,0.283,0.023,0.251,0.267,0.285,0.3,0.312,10.0,6303.681,3834.476,481.337,2864.957,7127.506,9323.68,10881.112
10,200,10.0,0.252,0.091,0.0,0.256,0.273,0.298,0.308,10.0,6202.075,3922.828,12.98,2778.221,6959.797,9325.806,10839.461
10,250,10.0,0.252,0.091,0.0,0.255,0.274,0.298,0.308,10.0,6145.238,3930.067,13.881,2745.085,6698.639,9325.953,10839.454
4,0,10.0,0.151,0.051,0.097,0.107,0.143,0.189,0.246,10.0,1911.373,1277.079,510.192,967.549,1498.601,2620.041,4462.043
4,100,10.0,0.282,0.027,0.22,0.269,0.297,0.299,0.301,10.0,4834.737,2024.202,870.224,3562.199,5274.252,6572.486,6628.08
4,150,10.0,0.285,0.026,0.22,0.283,0.297,0.299,0.301,10.0,4794.409,2049.331,769.603,3492.047,5206.849,6572.486,6628.08
4,200,10.0,0.285,0.026,0.22,0.283,0.297,0.299,0.301,10.0,4767.432,2042.48,768.706,3478.219,5105.107,6572.486,6628.08
4,250,10.0,0.285,0.025,0.22,0.283,0.298,0.299,0.301,10.0,4754.659,2039.569,768.706,3478.219,5048.915,6572.486,6628.08


#  Export summaries to files

In [41]:
summary.to_csv("summary.csv")


## Tips & troubleshooting

* If `df` is empty, ensure your APSIM file has a **Report** node and (if needed) call `model.run()` before `get_simulated_output('Report')`.
* If you see many `NaN` in summary tables, make sure you selected **numeric** columns (as above).
* For **multiple** Report tables, pass the exact one you want (e.g., `'Report'`, `'DailyReport'`) to `get_simulated_output(...)`.
* Prefer **explicit grouping** (e.g., `Population`, `Amount`) to avoid mixing different scenarios in the same statistics.


In [40]:
from apsimNGpy.core.config import apsim_version
from apsimNGpy import version
from apsimNGpy.settings import logger
print(f"Notebook generated by;\n APSIM version: `{apsim_version()}`\n apsimNGpy version {version.version}")

Notebook generated by;
 APSIM version: `APSIM 2025.8.7844.0`
 apsimNGpy version 0.39.9.15
