In [1]:
# Cell 1 — Configuration (edit these paths before running)
DATA_PATH = r"D:\SolarTiltProject\data\data_150ohm.csv"    # <-- change to your CSV path
OUT_DIR = r"D:\SolarTiltProject\data\150ohm"   # <-- change to desired output folder
R = 150.0                  # load resistance (ohms)
DAY_LUX_THRESH = 10000       # Lux threshold to decide daytime; lower it if many days missing
MIN_DAYTIME_POINTS = 5     # minimum daytime points required to consider a day "sufficient"


In [2]:
# Cell 2 — Imports and global styling (English labels only)
import os, math
from datetime import datetime, time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import pearsonr

os.makedirs(OUT_DIR, exist_ok=True)

plt.rcParams.update({
    'figure.autolayout': False,
    'axes.grid': True,
    'grid.linestyle': '--',
    'grid.alpha': 0.35,
    'axes.titleweight': 'bold',
    'font.size': 11,
})
# If you want interactive zooming, in a notebook run: %matplotlib notebook


In [3]:
# Cell 3 — Read CSV and show headers
df_raw = pd.read_csv(DATA_PATH)
print("CSV headers:")
print(list(df_raw.columns))
print("\nFirst 5 rows preview:")
display(df_raw.head())


CSV headers:
['Timestamp', 'Panel_Temp1_C', 'Panel_Temp2_C', 'Env_Temp_BMP_C', 'Pressure_hPa', 'Light_lux', 'Voltage_V', 'Current_mA', 'Power_mW']

First 5 rows preview:


Unnamed: 0,Timestamp,Panel_Temp1_C,Panel_Temp2_C,Env_Temp_BMP_C,Pressure_hPa,Light_lux,Voltage_V,Current_mA,Power_mW
0,2025/8/14 13:37,28.56,28.94,28.78,1010.3,1440.0,1.724,10.05,16.25
1,2025/8/14 13:37,28.56,28.94,28.8,1010.3,1434.2,1.727,10.05,16.25
2,2025/8/14 13:37,28.56,28.81,28.82,1010.4,1428.5,1.749,10.15,18.75
3,2025/8/14 13:37,28.69,28.81,28.85,1010.4,1428.5,1.747,10.2,18.75
4,2025/8/14 13:38,28.69,28.81,28.86,1010.4,1428.5,1.755,10.25,18.75


In [4]:
# Cell 4 — Detect time column, parse, sort, and deduplicate timestamps (mean for duplicates)
time_candidates = ['time','timestamp','datetime','DateTime','Time','date','采样时间','time_stamp','Timestamp']
time_col = None
for c in df_raw.columns:
    if c.lower() in [t.lower() for t in time_candidates]:
        time_col = c
        break
if time_col is None:
    for c in df_raw.columns:
        if df_raw[c].dtype == object:
            sample = df_raw[c].dropna().astype(str)
            if sample.shape[0] > 0:
                s = sample.iloc[0]
                if (':' in s or '-' in s or '/') and any(ch.isdigit() for ch in s):
                    time_col = c
                    break
if time_col is None:
    raise RuntimeError("Cannot detect time column automatically. Please rename your time column or set time_col manually.")

df_raw[time_col] = pd.to_datetime(df_raw[time_col], errors='coerce')
df_raw = df_raw.dropna(subset=[time_col]).sort_values(by=time_col).reset_index(drop=True)
df_raw = df_raw.set_index(time_col)
df = df_raw.groupby(level=0).mean()   # collapse duplicate timestamps
print("Time column detected:", time_col)
print("Time range:", df.index.min(), "to", df.index.max())


Time column detected: Timestamp
Time range: 2025-08-14 13:37:00 to 2025-08-19 11:56:00


In [5]:
# Cell 5 — Detect relevant columns (voltage, light, temps, pressure, humidity, current_mA, power_mW)
cols = list(df.columns)
cols_lower = {c.lower(): c for c in cols}

# voltage
voltage_col = None
for cand in ['voltage','voltage_v','voltage (v)','v','voltage_v','voltage_v']:
    if cand.lower() in cols_lower:
        voltage_col = cols_lower[cand.lower()]
        break
if voltage_col is None:
    for c in cols:
        if 'volt' in c.lower():
            voltage_col = c
            break
if voltage_col is None:
    raise RuntimeError("Voltage column not found. Columns present: " + str(cols))

# light
light_col = None
for cand in ['lux','light','light_lux','illuminance','light (lux)']:
    if cand.lower() in cols_lower:
        light_col = cols_lower[cand.lower()]
        break

# panel temp candidates (e.g., Panel_Temp1_C etc.)
panel_temp_cols = [c for c in cols if ('panel' in c.lower() and 'temp' in c.lower()) or ('panel_temp' in c.lower()) or ('panel_temp' in c)]
# env temp fallback
env_temp_cols = [c for c in cols if ('env' in c.lower() and 'temp' in c.lower()) or ('ambient' in c.lower() and 'temp' in c.lower())]
if not env_temp_cols:
    env_temp_cols = [c for c in cols if 'temp' in c.lower() and c not in panel_temp_cols][:1]

# pressure and humidity
pressure_col = next((cols_lower[c] for c in ['pressure','pressure_hpa','press_hpa'] if c in cols_lower), None)
humidity_col = next((cols_lower[c] for c in ['humidity','rel_humidity','rh'] if c in cols_lower), None)

# current/power if present
current_mA_col = None
for cand in ['current_ma','current_mA','current','i','current (ma)']:
    if cand.lower() in cols_lower:
        current_mA_col = cols_lower[cand.lower()]
        break
power_mW_col = None
for cand in ['power_mw','power_mW','power','p','power (mw)']:
    if cand.lower() in cols_lower:
        power_mW_col = cols_lower[cand.lower()]
        break

print("Detected columns:")
print("Voltage:", voltage_col)
print("Light:", light_col)
print("Panel temp candidates:", panel_temp_cols)
print("Env temp candidates:", env_temp_cols)
print("Pressure:", pressure_col)
print("Humidity:", humidity_col)
print("Current_mA:", current_mA_col)
print("Power_mW:", power_mW_col)


Detected columns:
Voltage: Voltage_V
Light: Light_lux
Panel temp candidates: ['Panel_Temp1_C', 'Panel_Temp2_C']
Env temp candidates: ['Env_Temp_BMP_C']
Pressure: Pressure_hPa
Humidity: None
Current_mA: Current_mA
Power_mW: Power_mW


In [6]:
# Cell 6 — Create standardized columns: Voltage_V, Current_A, Power_W, Panel_Temp_C, Env_Temp_C, Light_Lux, Pressure_hPa, Humidity_pct
df = df.copy()
df['Voltage_V'] = df[voltage_col].astype(float)

if current_mA_col:
    df['Current_A'] = df[current_mA_col].astype(float) / 1000.0
else:
    df['Current_A'] = df['Voltage_V'] / R

if power_mW_col:
    df['Power_W'] = df[power_mW_col].astype(float) / 1000.0
else:
    df['Power_W'] = df['Voltage_V'] * df['Current_A']

# Panel temperature: average multiple panel temp columns if available
panel_available = [c for c in panel_temp_cols if c in df.columns]
if panel_available:
    df['Panel_Temp_C'] = df[panel_available].mean(axis=1)

env_available = [c for c in env_temp_cols if c in df.columns]
if env_available:
    df['Env_Temp_C'] = df[env_available[0]].astype(float)

if light_col:
    df['Light_Lux'] = df[light_col].astype(float)
if pressure_col:
    df['Pressure_hPa'] = df[pressure_col].astype(float)
if humidity_col:
    df['Humidity_pct'] = df[humidity_col].astype(float)

print("Standardized columns prepared. Example rows:")
display(df[['Voltage_V','Current_A','Power_W']].head())
env_list = [c for c in ['Panel_Temp_C','Env_Temp_C','Light_Lux','Pressure_hPa','Humidity_pct'] if c in df.columns]
print("Available environment columns:", env_list)


Standardized columns prepared. Example rows:


Unnamed: 0_level_0,Voltage_V,Current_A,Power_W
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2025-08-14 13:37:00,1.73675,0.010112,0.0175
2025-08-14 13:38:00,1.799667,0.0105,0.018958
2025-08-14 13:39:00,1.865667,0.010867,0.020208
2025-08-14 13:40:00,1.799167,0.010492,0.018542
2025-08-14 13:41:00,1.6685,0.009708,0.016042


Available environment columns: ['Panel_Temp_C', 'Env_Temp_C', 'Light_Lux', 'Pressure_hPa']


In [7]:
# Cell 7 — Determine daytime (primary: Light_Lux > DAY_LUX_THRESH; fallback: 06:00-18:00)
if 'Light_Lux' in df.columns:
    daytime_mask = df['Light_Lux'] > DAY_LUX_THRESH
    per_day_counts = df[daytime_mask].resample('D').size()
    if (per_day_counts == 0).all():
        daytime_mask = (df.index.hour >= 6) & (df.index.hour <= 18)
else:
    daytime_mask = (df.index.hour >= 6) & (df.index.hour <= 18)

df_day = df[daytime_mask].copy()
print("Total samples:", df.shape[0], "Daytime samples:", df_day.shape[0])
if df_day.shape[0] == 0:
    raise RuntimeError("No daytime samples detected. Lower DAY_LUX_THRESH or check timestamps.")


Total samples: 2957 Daytime samples: 1771


In [8]:
# Cell 8 — Utility functions (normalize and safe Pearson r)
def normalize_series(s):
    s = s.dropna().astype(float)
    if s.empty:
        return pd.Series(dtype=float)
    lo, hi = s.min(), s.max()
    if hi - lo < 1e-9:
        return pd.Series(np.zeros(len(s)), index=s.index)
    return (s - lo) / (hi - lo)

def safe_pearson(x, y):
    mask = (~np.isnan(x)) & (~np.isnan(y))
    if mask.sum() < 3:
        return np.nan
    try:
        r, p = pearsonr(x[mask], y[mask])
    except Exception:
        r = np.nan
    return r


In [9]:
# Cell 9 — Per-day plotting loop (one main figure + auxiliaries per day). No linear fit lines, only Pearson r annotations.
days = sorted({ts.date() for ts in df_day.index})
generated_files = []
daily_summary = []

for day in days:
    day_start = pd.Timestamp(datetime.combine(day, time(0,0,0)))
    day_end = day_start + pd.Timedelta(days=1)
    grp = df_day[day_start:day_end]

    # Always create a figure even if few points exist
    hours = grp.index.hour + grp.index.minute / 60.0 + grp.index.second / 3600.0

    # Compute MPP and energy
    if not grp.empty:
        idx_mpp = grp['Power_W'].idxmax()
        mpp_row = grp.loc[idx_mpp]
        mpp_time = idx_mpp
        mpp_v = float(mpp_row['Voltage_V'])
        mpp_i = float(mpp_row['Current_A'])
        mpp_p = float(mpp_row['Power_W'])
    else:
        idx_mpp = None
        mpp_time = None
        mpp_v = mpp_i = mpp_p = np.nan

    # energy Wh trapezoid
    energy_Wh = 0.0
    if grp.shape[0] > 1:
        times = grp.index.astype('int64') // 10**9
        dt = np.diff(times)
        pvals = grp['Power_W'].values
        energy_Wh = np.sum((pvals[:-1] + pvals[1:]) / 2 * dt) / 3600.0

    daily_summary.append({
        'date': str(day),
        'mpp_time': mpp_time.isoformat() if mpp_time is not None else '',
        'mpp_voltage_V': float(mpp_v) if not np.isnan(mpp_v) else '',
        'mpp_current_A': float(mpp_i) if not np.isnan(mpp_i) else '',
        'mpp_power_W': float(mpp_p) if not np.isnan(mpp_p) else '',
        'daily_energy_Wh': float(energy_Wh),
        'daytime_points': int(grp.shape[0])
    })

    # Main figure: Power (left) + Voltage (right). Lower axis: normalized env series
    fig = plt.figure(figsize=(11,8))
    gs = fig.add_gridspec(3,1, height_ratios=[2,1,0.01], hspace=0.28)
    ax_main = fig.add_subplot(gs[0,0])

    if grp.shape[0] >= 1:
        ax_main.plot(hours, grp['Power_W'], marker='o', ms=4, linewidth=1, label='Power (W)')
    ax_main.set_ylabel('Power (W)')
    ax_main.set_title(f'{day} — Daytime Output and Environment (150 Ω load)')
    ax_v = ax_main.twinx()
    if grp.shape[0] >= 1:
        ax_v.plot(hours, grp['Voltage_V'], linestyle='--', marker='x', ms=4, linewidth=1, label='Voltage (V)')
    ax_v.set_ylabel('Voltage (V)')

    # annotate MPP when available
    if mpp_time is not None and not np.isnan(mpp_p):
        mpp_hour = mpp_time.hour + mpp_time.minute/60.0 + mpp_time.second/3600.0
        ax_main.scatter([mpp_hour], [mpp_p], s=80, zorder=6)
        ax_main.annotate(f'MPP\n{mpp_p:.3f} W\n{mpp_hour:.2f} h', xy=(mpp_hour, mpp_p),
                         xytext=(mpp_hour+0.35, mpp_p*0.95 if mpp_p>0.1 else mpp_p+0.01),
                         arrowprops=dict(arrowstyle='->', lw=1.2))

    # Legend
    h1, l1 = ax_main.get_legend_handles_labels()
    h2, l2 = ax_v.get_legend_handles_labels()
    ax_main.legend(h1+h2, l1+l2, loc='upper left', framealpha=0.8)

    # Lower subplot: environment variables normalized
    ax_env = fig.add_subplot(gs[1,0], sharex=ax_main)
    env_vars = []
    if 'Panel_Temp_C' in grp.columns:
        env_vars.append(('Panel_Temp_C','Panel Temp (°C)'))
    if 'Env_Temp_C' in grp.columns:
        env_vars.append(('Env_Temp_C','Ambient Temp (°C)'))
    if 'Light_Lux' in grp.columns:
        env_vars.append(('Light_Lux','Light (Lux)'))
    if 'Pressure_hPa' in grp.columns:
        env_vars.append(('Pressure_hPa','Pressure (hPa)'))
    if 'Humidity_pct' in grp.columns:
        env_vars.append(('Humidity (%)','Humidity (%)'))

    for col, label in env_vars:
        s = grp[col] if col in grp.columns else pd.Series(dtype=float)
        if not s.empty:
            s_norm = normalize_series(s)
            ax_env.plot(hours, s_norm, marker='.', ms=4, linewidth=1, label=label)

    ax_env.set_ylabel('Env vars (normalized)')
    ax_env.set_xlabel('Hour (h)')
    if env_vars:
        ax_env.legend(loc='upper left', ncol=2, framealpha=0.8)
    ax_env.grid(True, linestyle='--', alpha=0.35)

    ax_main.set_xlim(max(5.5, hours.min()-0.4) if len(hours)>0 else 5.5, min(19, hours.max()+0.4) if len(hours)>0 else 19)

    # Footer
    ax_footer = fig.add_subplot(gs[2,0])
    ax_footer.axis('off')
    footer_text = f"Data time range: {grp.index.min() if not grp.empty else 'N/A'} → {grp.index.max() if not grp.empty else 'N/A'} | Points: {grp.shape[0]} | R={R}Ω"
    ax_footer.text(0, 0.5, footer_text, fontsize=9)

    # If insufficient daytime points, add a visible note on the figure
    if grp.shape[0] < MIN_DAYTIME_POINTS:
        ax_main.text(0.5, 0.5, 'Insufficient daytime data for reliable curves', transform=ax_main.transAxes,
                     fontsize=12, color='red', ha='center', va='center', bbox=dict(facecolor='white', alpha=0.8))

    fname_main = os.path.join(OUT_DIR, f'day_{day}_main_env.png')
    fig.savefig(fname_main, dpi=300)
    plt.close(fig)
    generated_files.append(fname_main)

    # Aux 1: Panel Temp vs Power (scatter + Pearson r text only)
    if 'Panel_Temp_C' in grp.columns and grp.shape[0] >= 1:
        fig2, ax2 = plt.subplots(figsize=(6,5))
        ax2.scatter(grp['Panel_Temp_C'], grp['Power_W'], s=12)
        ax2.set_xlabel('Panel Temperature (°C)')
        ax2.set_ylabel('Power (W)')
        ax2.set_title(f'{day} — Panel Temp vs Power')
        r_temp = safe_pearson(grp['Panel_Temp_C'].values, grp['Power_W'].values)
        ax2.text(0.02, 0.95, f"Pearson r = {r_temp:.3f}" if not np.isnan(r_temp) else "Pearson r = N/A",
                 transform=ax2.transAxes, verticalalignment='top', bbox=dict(facecolor='white', alpha=0.8))
        f2 = os.path.join(OUT_DIR, f'day_{day}_temp_vs_power.png')
        fig2.savefig(f2, dpi=300)
        plt.close(fig2)
        generated_files.append(f2)
    else:
        # create a small note image for consistency
        fig2, ax2 = plt.subplots(figsize=(6,4))
        ax2.axis('off')
        ax2.text(0.5,0.5,"Panel temp data not available for this day", ha='center', va='center')
        f2 = os.path.join(OUT_DIR, f'day_{day}_temp_vs_power_note.png')
        fig2.savefig(f2, dpi=300)
        plt.close(fig2)
        generated_files.append(f2)

    # Aux 2: Light vs Power (scatter + Pearson r text only)
    if 'Light_Lux' in grp.columns and grp.shape[0] >= 1:
        fig3, ax3 = plt.subplots(figsize=(6,5))
        ax3.scatter(grp['Light_Lux'], grp['Power_W'], s=10)
        ax3.set_xlabel('Light (Lux)')
        ax3.set_ylabel('Power (W)')
        ax3.set_title(f'{day} — Light vs Power')
        r_light = safe_pearson(grp['Light_Lux'].values, grp['Power_W'].values)
        ax3.text(0.02, 0.95, f"Pearson r = {r_light:.3f}" if not np.isnan(r_light) else "Pearson r = N/A",
                 transform=ax3.transAxes, verticalalignment='top', bbox=dict(facecolor='white', alpha=0.8))
        f3 = os.path.join(OUT_DIR, f'day_{day}_light_vs_power.png')
        fig3.savefig(f3, dpi=300)
        plt.close(fig3)
        generated_files.append(f3)
    else:
        fig3, ax3 = plt.subplots(figsize=(6,4))
        ax3.axis('off')
        ax3.text(0.5,0.5,"Light data not available for this day", ha='center', va='center')
        f3 = os.path.join(OUT_DIR, f'day_{day}_light_vs_power_note.png')
        fig3.savefig(f3, dpi=300)
        plt.close(fig3)
        generated_files.append(f3)

    # Aux 3: V-I scatter
    if grp.shape[0] >= 1:
        fig4, ax4 = plt.subplots(figsize=(6,5))
        ax4.scatter(grp['Voltage_V'], grp['Current_A'], s=10)
        ax4.set_xlabel('Voltage (V)')
        ax4.set_ylabel('Current (A)')
        ax4.set_title(f'{day} — V-I (daytime)')
        f4 = os.path.join(OUT_DIR, f'day_{day}_VI.png')
        fig4.savefig(f4, dpi=300)
        plt.close(fig4)
        generated_files.append(f4)

        fig5, ax5 = plt.subplots(figsize=(6,5))
        ax5.scatter(grp['Voltage_V'], grp['Power_W'], s=10)
        ax5.set_xlabel('Voltage (V)')
        ax5.set_ylabel('Power (W)')
        ax5.set_title(f'{day} — P-V (daytime)')
        f5 = os.path.join(OUT_DIR, f'day_{day}_PV.png')
        fig5.savefig(f5, dpi=300)
        plt.close(fig5)
        generated_files.append(f5)

    # Aux 4: correlation heatmap (Power + env vars)
    corr_cols = ['Power_W']
    for c in ['Panel_Temp_C','Env_Temp_C','Light_Lux','Pressure_hPa','Humidity_pct','Voltage_V','Current_A']:
        if c in grp.columns:
            corr_cols.append(c)
    corr_df = grp[corr_cols].dropna()
    if corr_df.shape[0] >= 4:
        corr = corr_df.corr()
        figh, axh = plt.subplots(figsize=(6,5))
        im = axh.imshow(corr.values, vmin=-1, vmax=1, cmap='bwr')
        axh.set_xticks(range(len(corr.columns))); axh.set_xticklabels(corr.columns, rotation=45, ha='right')
        axh.set_yticks(range(len(corr.columns))); axh.set_yticklabels(corr.columns)
        for i in range(len(corr.columns)):
            for j in range(len(corr.columns)):
                axh.text(j, i, f"{corr.values[i,j]:.2f}", ha='center', va='center', color='black', fontsize=8)
        figh.colorbar(im, ax=axh, fraction=0.046, pad=0.04)
        axh.set_title(f'{day} — Intraday correlation matrix')
        f_h = os.path.join(OUT_DIR, f'day_{day}_corr_heatmap.png')
        figh.savefig(f_h, dpi=300, bbox_inches='tight')
        plt.close(figh)
        generated_files.append(f_h)
    else:
        # small note file if not enough data
        f_h = os.path.join(OUT_DIR, f'day_{day}_corr_heatmap_note.png')
        fign, axn = plt.subplots(figsize=(6,4))
        axn.axis('off')
        axn.text(0.5,0.5,"Not enough data for intraday correlation matrix", ha='center', va='center')
        fign.savefig(f_h, dpi=300)
        plt.close(fign)
        generated_files.append(f_h)

# Save daily summary
daily_df = pd.DataFrame(daily_summary).set_index('date')
daily_csv = os.path.join(OUT_DIR, 'daily_summary.csv')
daily_df.to_csv(daily_csv)
print(f"Generated {len(generated_files)} files into {OUT_DIR}")
print("Daily summary saved to:", daily_csv)


Generated 36 files into D:\SolarTiltProject\data\150ohm
Daily summary saved to: D:\SolarTiltProject\data\150ohm\daily_summary.csv
