In [8]:
# -*- coding: utf-8 -*-
"""
Build Table 3.4.1 from 'characterisation_outdoor.csv'.

- Uses only rows where "absolute error [W/m2]" is present (non-blank).
- Groups by Platform tilt and outputs a CSV with:
    Columns: aoi 5° (tilt 70°), aoi 40° (tilt 35°), aoi 60° (tilt 15°), aoi 70° (tilt 5°)
    Rows (in order):
      - Average angle of incidence [degree]
      - Adjustable Tilt
      - Number of measurement series
      - Average pyranometer temperature [degree Celsius]
      - Average
      - Standard deviation
      - Min
      - Percentile 5%
      - Percentile 95%
      - Max
  (The last six rows refer to statistics of the absolute error [W/m2].)
"""

import os
import re
import numpy as np
import pandas as pd

# --------------------------- Paths (as requested) ---------------------------
BASE_DIR = r"C:/Users/wsfm/OneDrive - Loughborough University/_Personal_Backup/ground_based_solar_irradiance/"
INP_DIR  = os.path.join(BASE_DIR, "assets")
OUT_DIR  = os.path.join(BASE_DIR, "v_results_3_output")
INP_FILE = os.path.join(INP_DIR, "characterisation_outdoor.csv")
OUT_FILE = "Table_3_4_1.csv"

os.makedirs(OUT_DIR, exist_ok=True)

# --------------------------- Helpers ---------------------------
def _norm(name: str) -> str:
    """Normalize header for matching: lowercase and collapse spaces."""
    return re.sub(r"\s+", " ", str(name).strip().lower())

def _resolve_col(df_cols, wanted: str, *aliases):
    """
    Resolve a column by normalized header. Returns actual column name or None.
    You can pass multiple aliases that will be tried in order.
    """
    norm_map = {_norm(c): c for c in df_cols}
    for w in (wanted, *aliases):
        k = _norm(w)
        if k in norm_map:
            return norm_map[k]
    return None

def _to_number(x):
    """
    Coerce string/number to float; handle percent signs, commas, spaces,
    and spreadsheet placeholders (#N/A, #REF!, #DIV/0!).
    """
    if x is None:
        return np.nan
    s = str(x).strip()
    if s == "" or s.upper() in {"#N/A", "#REF!", "#DIV/0!"}:
        return np.nan
    s = (s.replace("\u00A0", "")  # no-break space
           .replace("\u2009", "")  # thin space
           .replace(" ", "")
           .replace(",", ".")
           .replace("%", ""))
    try:
        return round(float(s),2)
    except ValueError:
        return np.nan

# --------------------------- Load & basic checks ---------------------------
df_raw = pd.read_csv(INP_FILE, dtype=str)

# Required columns (as present in your CSV)
col_aoi   = _resolve_col(df_raw.columns, "AOI", "AOI estimated", "Average of AOI estimated")
# col_abs   = _resolve_col(df_raw.columns, "absolute error [W/m2]")
col_abs   = _resolve_col(df_raw.columns, "absolute_error_pyrhelio_max")
col_tilt  = _resolve_col(df_raw.columns, "Platform tilt")
# Temperature column for "Average pyranometer temperature [degree Celsius]"
col_temp  = _resolve_col(df_raw.columns, "Average of CMP21_Temp_C_Avg", "CMP21_Temp_C_Avg",
                         "Average of CMP21 Temp C Avg", "cmp21 temp", "pyranometer temperature")
col_direct_normal_fraction = _resolve_col(df_raw.columns, "direct_normal_fraction")



if col_aoi is None or col_abs is None or col_tilt is None:
    raise KeyError(
        "Required columns not found. Needed: 'AOI', 'absolute error [W/m2]', 'Platform tilt'. "
        f"Available columns: {list(df_raw.columns)}"
    )

# Convert to numeric and filter rows with absolute error present
AOI   = df_raw[col_aoi].map(_to_number)
ABS   = df_raw[col_abs].map(_to_number)
TILT  = df_raw[col_tilt].map(_to_number)
TEMP  = df_raw[col_temp].map(_to_number) if col_temp is not None else pd.Series([np.nan] * len(df_raw))
DNF =  df_raw[col_direct_normal_fraction].map(_to_number)

mask = AOI.notna() & ABS.notna() & TILT.notna()
data = pd.DataFrame({
    "AOI": AOI[mask],
    "ABS": ABS[mask],
    "TILT": TILT[mask],
    "TEMP": TEMP[mask] if col_temp is not None else np.nan,
    "DNF": DNF[mask]
})

data = data[data["DNF"]>0.95]

# --------------------------- Column order & labels ---------------------------
TILT_ORDER = [70, 35, 15, 5]
COL_LABELS = {
    70: "aoi 5° (tilt 70°)",
    35: "aoi 40° (tilt 35°)",
    15: "aoi 60° (tilt 15°)",
    5:  "aoi 70° (tilt 5°)",
}
COLS_OUT = [COL_LABELS[t] for t in TILT_ORDER]

# --------------------------- Row order & labels ---------------------------
ROW_ORDER = [
    "Average angle of incidence [degree]",
    "Adjustable Tilt",
    "Number of measurement series",
    "Average pyranometer temperature [degree Celsius]",
    "Average",
    "Standard deviation",
    "Min",
    "Percentile 5%",
    "Percentile 95%",
    "Max",
]

# --------------------------- Aggregation per tilt ---------------------------
def _stats_abs(series: pd.Series):
    """Return dict of stats for absolute error."""
    s = series.dropna().to_numpy(dtype=float)
    if s.size == 0:
        return {
            "Average": np.nan,
            "Standard deviation": np.nan,
            "Min": np.nan,
            "Percentile 5%": np.nan,
            "Percentile 95%": np.nan,
            "Max": np.nan,
        }
    return {
        "Average": round(float(np.mean(s)),2),
        "Standard deviation": round(float(np.std(s, ddof=1)),2) if s.size > 1 else 0.0,
        "Min": round(float(np.min(s)),2),
        "Percentile 5%": round(float(np.percentile(s, 5)),2),
        "Percentile 95%": round(float(np.percentile(s, 95)),2),
        "Max": round(float(np.max(s)),2),
    }

# Build output structure: rows x columns
result = {col: {row: np.nan for row in ROW_ORDER} for col in COLS_OUT}

for tilt in TILT_ORDER:
    col_name = COL_LABELS[tilt]
    g = data.loc[(data["TILT"] == tilt)]

    # (1) Average angle of incidence [degree]
    aoi_avg = round(float(g["AOI"].mean()),2) if not g.empty else np.nan
    result[col_name]["Average angle of incidence [degree]"] = aoi_avg

    # (2) Adjustable Tilt (just the representative tilt value)
    result[col_name]["Adjustable Tilt"] = float(tilt)

    # (3) Number of measurement series (count of rows in group)
    result[col_name]["Number of measurement series"] = int(len(g)) if not g.empty else 0

    # (4) Average pyranometer temperature [degree Celsius]
    if "TEMP" in g and g["TEMP"].notna().any():
        temp_avg = round(float(g["TEMP"].mean(skipna=True)),2)
    else:
        temp_avg = np.nan
    result[col_name]["Average pyranometer temperature [degree Celsius]"] = temp_avg

    # (5–10) Absolute error statistics
    stats = _stats_abs(g["ABS"]) if not g.empty else _stats_abs(pd.Series([], dtype=float))
    for key in ["Average", "Standard deviation", "Min", "Percentile 5%", "Percentile 95%", "Max"]:
        result[col_name][key] = stats[key]

# --------------------------- Assemble and save ---------------------------
out_df = pd.DataFrame(result)
out_df = out_df.reindex(index=ROW_ORDER, columns=COLS_OUT)

# Save with reasonable numeric formatting
out_path = os.path.join(OUT_DIR, OUT_FILE)
out_df.to_csv(out_path, float_format="%.6f")
print(f"Saved: {out_path}")

Saved: C:/Users/wsfm/OneDrive - Loughborough University/_Personal_Backup/ground_based_solar_irradiance/v_results_3_output\Table_3_4_1.csv


In [9]:
out_df

Unnamed: 0,aoi 5° (tilt 70°),aoi 40° (tilt 35°),aoi 60° (tilt 15°),aoi 70° (tilt 5°)
Average angle of incidence [degree],4.99,40.08,60.81,
Adjustable Tilt,70.0,35.0,15.0,5.0
Number of measurement series,8.0,5.0,2.0,0.0
Average pyranometer temperature [degree Celsius],12.59,13.25,12.25,
Average,8.71,12.78,37.77,
Standard deviation,6.14,3.3,1.41,
Min,-5.34,10.5,36.77,
Percentile 5%,-1.05,10.65,36.87,
Percentile 95%,13.32,17.25,38.66,
Max,13.76,18.62,38.76,
