In [36]:
import pandas as pd
import numpy as np
import os

In [37]:
def extract_site_name(filename):
    # Split the string on space " " and underscore "_", then filter out empty strings
    parts = [part for part in filename.replace("_", " ").split(" ") if part]

    # Find the index of the parts where 'NESM' and 'availability' occur
    try:
        start_index = parts.index('NESM') + 1  
        end_index = parts.index('availability')
    except ValueError:
        print("Can't find 'NESM' or 'avalability' in the file name.")

    # Extract the parts between 'NESM' and 'availability'
    site_name_parts = parts[start_index:end_index]

    # Join the parts back together with space to form the site name
    site_name = " ".join(site_name_parts) 
    

    return site_name

In [38]:
def concat_csv_files(directory, pattern):
    all_data = []

    for filename in os.listdir(directory):
        if filename.endswith(pattern):
            print(filename)
            site_name = extract_site_name(filename)
            print(f"Processing: {site_name}")
            file_path = os.path.join(directory, filename)
            combined_df = pd.read_csv(file_path, index_col=None, header=0)
            combined_df = combined_df.iloc[0].to_frame().T
            combined_df.insert(0, "Site Name", site_name)
            all_data.append(combined_df)

    combined_df = pd.concat(all_data, ignore_index=True)
    combined_df.columns = [
        col if col == "Site Name" else pd.to_datetime(col).strftime("%Y-%m")
        for col in combined_df.columns
    ]

    combined_df = combined_df.sort_values(by=combined_df.columns[0])

    return combined_df

In [39]:
combined_df = concat_csv_files("../202209-202309_cal", "Calc.csv")
combined_df.to_csv("manual_result.csv", index=False)
combined_df

NESM Chiloquin availability calc_Calc.csv
Processing: Chiloquin
NESM Tiburon availability calc_Calc.csv
Processing: Tiburon
NESM Sedberry availability calc_Calc.csv
Processing: Sedberry
NESM Gauss availability calc_Calc.csv
Processing: Gauss
NESM NC Solar II availability calc_Calc.csv
Processing: NC Solar II
NESM Sarah availability calc_Calc.csv
Processing: Sarah
NESM Nitro availability calc_Calc.csv
Processing: Nitro
NESM Mariposa availability calc_Calc.csv
Processing: Mariposa
NESM Sonne Two availability calc_Calc.csv
Processing: Sonne Two
NESM Merrill availability calc_Calc.csv
Processing: Merrill
NESM South Robeson availability calc_Calc.csv
Processing: South Robeson
NESM NC Solar I availability calc_Calc.csv
Processing: NC Solar I
NESM Jersey availability calc_Calc.csv
Processing: Jersey
NESM Cotten availability calc_Calc.csv
Processing: Cotten
NESM Freemont availability calc_Calc.csv
Processing: Freemont
NESM Turkey Hill availability calc_Calc.csv
Processing: Turkey Hill
NESM Dav

Unnamed: 0,Site Name,2022-10,2022-11,2022-12,2023-01,2023-02,2023-03,2023-04,2023-05,2023-06,2023-07,2023-08,2023-09,2022-09
0,Chiloquin,0.986329,0.985724,0.986081,0.979458,0.971827,0.963637,0.936103,0.916007,0.915993,0.924867,0.930116,0.931932,
13,Cotten,1.0,0.999271,0.999442,0.999317,0.999454,0.999063,0.999244,0.999371,0.999461,0.99953,0.999576,0.999614,
18,County Home,0.979107,0.983675,0.98151,0.982984,0.985824,0.986618,0.986664,0.98687,0.985498,0.985802,0.987212,0.988286,
26,Dairy,1.0,0.999996,0.999899,0.99992,0.999937,0.999949,0.999962,0.99997,0.999344,0.999072,0.999161,0.999236,
16,Davis Lane,0.86219,0.927093,0.949268,0.960319,0.968344,0.975159,0.978917,0.977248,0.980337,0.982835,0.984498,0.981166,
23,Faison,0.954172,0.931908,0.942698,0.953137,0.960721,0.963328,0.963674,0.964819,0.969025,0.965642,0.962017,,0.977422
24,Four Oaks,0.994944,0.996392,0.997056,0.997558,0.991802,0.993229,0.993942,0.994776,0.995378,0.993208,0.993877,,0.995671
14,Freemont,0.66449,0.663426,0.663501,0.663975,0.66414,0.663449,0.663682,0.662908,0.662523,0.662804,0.663,0.681284,
3,Gauss,0.961699,0.980854,0.973909,0.95681,0.958563,0.959953,0.966804,0.97282,0.973066,0.975433,0.977164,0.978505,
12,Jersey,1.0,1.0,1.0,1.0,0.999784,0.997679,0.997878,0.998203,0.998218,0.998436,0.998605,0.998724,


In [40]:
csv_path1 = "./manual_result.csv"
csv_path2 = "../output/summary/summary.csv"

manual_result = pd.read_csv(csv_path1)
automated_result = pd.read_csv(csv_path2)
common_columns = manual_result.columns.intersection(automated_result.columns).drop('Site Name')

common_columns

Index(['2022-10', '2022-11', '2022-12', '2023-01', '2023-02', '2023-03',
       '2023-04', '2023-05', '2023-06', '2023-07', '2023-08', '2023-09',
       '2022-09'],
      dtype='object')

In [41]:
difference_df = pd.DataFrame(index=automated_result.index, columns=common_columns)
for column in common_columns:
    difference_df[column] = ((manual_result[column] * 100).round(2) - automated_result[column]).round(2)
difference_df.reset_index()
difference_df.to_csv("hightlight.csv", index=False)
difference_df

Unnamed: 0,2022-10,2022-11,2022-12,2023-01,2023-02,2023-03,2023-04,2023-05,2023-06,2023-07,2023-08,2023-09,2022-09
0,1.17,0.23,1.8,10.38,3.5,7.69,18.88,28.86,13.14,11.81,15.35,16.5,
1,0.0,9.95,12.92,7.24,-0.05,0.51,-0.08,-0.06,-0.05,-0.01,9.69,-0.04,
2,0.04,-0.43,0.26,0.46,2.86,-0.07,1.31,-0.12,1.5,0.11,-0.28,-0.33,
3,0.0,0.36,1.34,-0.01,-0.01,0.19,0.0,0.0,0.41,3.94,-0.08,-0.08,
4,-2.83,-7.29,-5.07,-3.77,-3.17,-2.48,-1.69,-0.01,-1.9,-1.72,-1.55,3.11,
5,10.07,3.57,-4.68,-4.69,-3.93,-2.29,-1.69,-1.34,-3.1,3.28,17.3,,4.15
6,0.14,-0.36,-0.29,-0.24,2.48,-0.68,-0.53,-0.52,-0.46,1.73,-0.61,,0.47
7,-5.3,-4.85,-7.16,-7.53,-5.06,-5.68,-4.67,-4.74,-4.57,-4.3,-3.75,-21.26,
8,-3.17,-3.56,1.1,-2.63,-3.14,-2.74,-3.25,-2.2,4.83,8.38,11.03,1.13,
9,0.0,0.0,0.0,0.0,0.2,0.36,0.36,-0.18,0.12,-0.16,0.24,-0.13,
