In [1]:
import pandas as pd

In [2]:
from prettytable import PrettyTable

# Create a list to store messages for the log file
log_messages = []

# Log and print messages
def log(message):
    log_messages.append(message)

# Print detailed information of selected rows with pretty format
def get_info(rows):
    table = PrettyTable()
    table.field_names = ["Index"] + rows.columns.tolist()
    for index, row in rows.iterrows():
        table.add_row([index+2] + row.tolist())
    
    return table

In [3]:
file_path = "../data/2023-08-01-2023-08-31_Tumbleweed Monthly.csv"

In [5]:
lines = []

# Read only the first 10 lines into the 'lines' list
with open(file_path, "r") as f:
    for _ in range(10):
        line = f.readline()
        if not line:
            break
        lines.append(line)

# The first line where the word "timpestamps" (in any case) appears is the header line
header_index = next(
    (i for i, line in enumerate(lines) if "timestamp" in line.lower()), None
)

if header_index is None:
    error_message = "Header not found in the file."
    log(error_message)
    raise ValueError("Header not found in the file.")

# The index of the found header is exactly the number of rows to skip when reading the data
df = pd.read_csv(file_path, skiprows=header_index, header=0)
df

Unnamed: 0,Timestamp,POA Sensor,Weather station ambient temperature,Max wind speed - Weather Station 1 (Standard)(POA),Production meter L-N voltage,Production meter active power,"Sungrow 60kw Inverter - 1.1, Line kW","Sungrow 60kw Inverter - 1.2, Line kW","Sungrow 60kw Inverter - 1.3, Line kW","Sungrow 60kw Inverter - 1.4, Line kW",...,"Sungrow 60kw Inverter - 11.6, Line kW","Sungrow 60kw Inverter - 11.7, Line kW","Sungrow 60kw Inverter - 11.8, Line kW","Sungrow 60kw Inverter - 11.9, Line kW","Sungrow 60kw Inverter - 11.10, Line kW","Sungrow 60kw Inverter - 11.11, Line kW","Sungrow 60kw Inverter - 11.12, Line kW","Sungrow 60kw Inverter - 11.13, Line kW","Sungrow 60kw Inverter - 11.14, Line kW","Sungrow 60kw Inverter - 11.15, Line kW"
0,8/1/23 00:00,0.0,63.36313,0,0.000000,0.00000,,,,,...,,,,,,,,,,
1,8/1/23 00:15,0.0,61.01357,0,0.000000,0.00000,,,,,...,,,,,,,,,,
2,8/1/23 00:30,0.0,59.10123,0,0.000000,0.00000,,,,,...,,,,,,,,,,
3,8/1/23 00:45,0.0,60.05817,0,0.000000,0.00000,,,,,...,,,,,,,,,,
4,8/1/23 01:00,0.0,58.15518,0,0.000000,0.00000,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2971,8/31/23 22:45,0.0,55.50051,0,7.329794,-13.59511,,,,,...,,,,,,,,,,
2972,8/31/23 23:00,0.0,55.77038,0,7.348179,-13.70044,,,,,...,,,,,,,,,,
2973,8/31/23 23:15,-999.0,0.00000,0,7.348179,-13.70044,,,,,...,,,,,,,,,,
2974,8/31/23 23:30,-999.0,0.00000,0,7.348179,-13.70044,,,,,...,,,,,,,,,,


In [6]:
def column_temperature(df):
    ambient= [col for col in df.columns if 'ambient' in col.lower() and 'temperature' in col.lower()]
    if ambient:
        df.rename(columns={ambient[0]: 'Temperature'}, inplace=True)
    else:
        temperature = [col for col in df.columns if 'temperature' in col.lower()]
        if temperature:
            df.rename(columns={temperature[0]: 'Temperature'}, inplace=True)
        else:
            df['Ambient Temperature'] = -999
    return df

df = column_temperature(df)
df

Unnamed: 0,Timestamp,POA Sensor,Temperature,Max wind speed - Weather Station 1 (Standard)(POA),Production meter L-N voltage,Production meter active power,"Sungrow 60kw Inverter - 1.1, Line kW","Sungrow 60kw Inverter - 1.2, Line kW","Sungrow 60kw Inverter - 1.3, Line kW","Sungrow 60kw Inverter - 1.4, Line kW",...,"Sungrow 60kw Inverter - 11.6, Line kW","Sungrow 60kw Inverter - 11.7, Line kW","Sungrow 60kw Inverter - 11.8, Line kW","Sungrow 60kw Inverter - 11.9, Line kW","Sungrow 60kw Inverter - 11.10, Line kW","Sungrow 60kw Inverter - 11.11, Line kW","Sungrow 60kw Inverter - 11.12, Line kW","Sungrow 60kw Inverter - 11.13, Line kW","Sungrow 60kw Inverter - 11.14, Line kW","Sungrow 60kw Inverter - 11.15, Line kW"
0,8/1/23 00:00,0.0,63.36313,0,0.000000,0.00000,,,,,...,,,,,,,,,,
1,8/1/23 00:15,0.0,61.01357,0,0.000000,0.00000,,,,,...,,,,,,,,,,
2,8/1/23 00:30,0.0,59.10123,0,0.000000,0.00000,,,,,...,,,,,,,,,,
3,8/1/23 00:45,0.0,60.05817,0,0.000000,0.00000,,,,,...,,,,,,,,,,
4,8/1/23 01:00,0.0,58.15518,0,0.000000,0.00000,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2971,8/31/23 22:45,0.0,55.50051,0,7.329794,-13.59511,,,,,...,,,,,,,,,,
2972,8/31/23 23:00,0.0,55.77038,0,7.348179,-13.70044,,,,,...,,,,,,,,,,
2973,8/31/23 23:15,-999.0,0.00000,0,7.348179,-13.70044,,,,,...,,,,,,,,,,
2974,8/31/23 23:30,-999.0,0.00000,0,7.348179,-13.70044,,,,,...,,,,,,,,,,


In [7]:
def column_wind(df):
    wind = [col for col in df.columns if 'wind' in col.lower() or 'speed' in col.lower()]
    if wind:
        df.rename(columns={wind[0]: 'Wind Speed'}, inplace=True)
    else:
        df['Wind Speed'] = -999
    return df

df = column_wind(df)
df

Unnamed: 0,Timestamp,POA Sensor,Temperature,Wind Speed,Production meter L-N voltage,Production meter active power,"Sungrow 60kw Inverter - 1.1, Line kW","Sungrow 60kw Inverter - 1.2, Line kW","Sungrow 60kw Inverter - 1.3, Line kW","Sungrow 60kw Inverter - 1.4, Line kW",...,"Sungrow 60kw Inverter - 11.6, Line kW","Sungrow 60kw Inverter - 11.7, Line kW","Sungrow 60kw Inverter - 11.8, Line kW","Sungrow 60kw Inverter - 11.9, Line kW","Sungrow 60kw Inverter - 11.10, Line kW","Sungrow 60kw Inverter - 11.11, Line kW","Sungrow 60kw Inverter - 11.12, Line kW","Sungrow 60kw Inverter - 11.13, Line kW","Sungrow 60kw Inverter - 11.14, Line kW","Sungrow 60kw Inverter - 11.15, Line kW"
0,8/1/23 00:00,0.0,63.36313,0,0.000000,0.00000,,,,,...,,,,,,,,,,
1,8/1/23 00:15,0.0,61.01357,0,0.000000,0.00000,,,,,...,,,,,,,,,,
2,8/1/23 00:30,0.0,59.10123,0,0.000000,0.00000,,,,,...,,,,,,,,,,
3,8/1/23 00:45,0.0,60.05817,0,0.000000,0.00000,,,,,...,,,,,,,,,,
4,8/1/23 01:00,0.0,58.15518,0,0.000000,0.00000,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2971,8/31/23 22:45,0.0,55.50051,0,7.329794,-13.59511,,,,,...,,,,,,,,,,
2972,8/31/23 23:00,0.0,55.77038,0,7.348179,-13.70044,,,,,...,,,,,,,,,,
2973,8/31/23 23:15,-999.0,0.00000,0,7.348179,-13.70044,,,,,...,,,,,,,,,,
2974,8/31/23 23:30,-999.0,0.00000,0,7.348179,-13.70044,,,,,...,,,,,,,,,,


In [8]:
def column_voltage(df):
    voltage = [col for col in df.columns if 'voltage' in col.lower()]
    if len(voltage) > 1:
        less_missing = min(voltage, key= lambda col: df[col].isna().sum)
        df.rename(columns={less_missing: 'Meter Voltage'}, inplace=True)
    elif voltage:
        df.rename(columns={voltage[0]: 'Meter Voltage'}, inplace=True)
    else:
        df['Meter Voltage'] = -999
    
    return df

df = column_voltage(df)
df

Unnamed: 0,Timestamp,POA Sensor,Temperature,Wind Speed,Meter Voltage,Production meter active power,"Sungrow 60kw Inverter - 1.1, Line kW","Sungrow 60kw Inverter - 1.2, Line kW","Sungrow 60kw Inverter - 1.3, Line kW","Sungrow 60kw Inverter - 1.4, Line kW",...,"Sungrow 60kw Inverter - 11.6, Line kW","Sungrow 60kw Inverter - 11.7, Line kW","Sungrow 60kw Inverter - 11.8, Line kW","Sungrow 60kw Inverter - 11.9, Line kW","Sungrow 60kw Inverter - 11.10, Line kW","Sungrow 60kw Inverter - 11.11, Line kW","Sungrow 60kw Inverter - 11.12, Line kW","Sungrow 60kw Inverter - 11.13, Line kW","Sungrow 60kw Inverter - 11.14, Line kW","Sungrow 60kw Inverter - 11.15, Line kW"
0,8/1/23 00:00,0.0,63.36313,0,0.000000,0.00000,,,,,...,,,,,,,,,,
1,8/1/23 00:15,0.0,61.01357,0,0.000000,0.00000,,,,,...,,,,,,,,,,
2,8/1/23 00:30,0.0,59.10123,0,0.000000,0.00000,,,,,...,,,,,,,,,,
3,8/1/23 00:45,0.0,60.05817,0,0.000000,0.00000,,,,,...,,,,,,,,,,
4,8/1/23 01:00,0.0,58.15518,0,0.000000,0.00000,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2971,8/31/23 22:45,0.0,55.50051,0,7.329794,-13.59511,,,,,...,,,,,,,,,,
2972,8/31/23 23:00,0.0,55.77038,0,7.348179,-13.70044,,,,,...,,,,,,,,,,
2973,8/31/23 23:15,-999.0,0.00000,0,7.348179,-13.70044,,,,,...,,,,,,,,,,
2974,8/31/23 23:30,-999.0,0.00000,0,7.348179,-13.70044,,,,,...,,,,,,,,,,


In [9]:
def column_others(df):
    keyword_mapping = {
        "Timestamp": ["timestamp"],
        "POA Irradiance": ["poa"],
        "Meter Power": ["meter", "power"],
    }

    rename_mapping = {}
    for new_name, keywords in keyword_mapping.items():
        for col in df.columns:
            if all(keyword.lower() in col.lower() for keyword in keywords):
                rename_mapping[col] = new_name
                break

    # Rename columns excluding inverters according to the rename_mapping dictionary
    df.rename(columns=rename_mapping, inplace=True)
    
    return df

df = column_others(df)
df

Unnamed: 0,Timestamp,POA Irradiance,Temperature,Wind Speed,Meter Voltage,Meter Power,"Sungrow 60kw Inverter - 1.1, Line kW","Sungrow 60kw Inverter - 1.2, Line kW","Sungrow 60kw Inverter - 1.3, Line kW","Sungrow 60kw Inverter - 1.4, Line kW",...,"Sungrow 60kw Inverter - 11.6, Line kW","Sungrow 60kw Inverter - 11.7, Line kW","Sungrow 60kw Inverter - 11.8, Line kW","Sungrow 60kw Inverter - 11.9, Line kW","Sungrow 60kw Inverter - 11.10, Line kW","Sungrow 60kw Inverter - 11.11, Line kW","Sungrow 60kw Inverter - 11.12, Line kW","Sungrow 60kw Inverter - 11.13, Line kW","Sungrow 60kw Inverter - 11.14, Line kW","Sungrow 60kw Inverter - 11.15, Line kW"
0,8/1/23 00:00,0.0,63.36313,0,0.000000,0.00000,,,,,...,,,,,,,,,,
1,8/1/23 00:15,0.0,61.01357,0,0.000000,0.00000,,,,,...,,,,,,,,,,
2,8/1/23 00:30,0.0,59.10123,0,0.000000,0.00000,,,,,...,,,,,,,,,,
3,8/1/23 00:45,0.0,60.05817,0,0.000000,0.00000,,,,,...,,,,,,,,,,
4,8/1/23 01:00,0.0,58.15518,0,0.000000,0.00000,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2971,8/31/23 22:45,0.0,55.50051,0,7.329794,-13.59511,,,,,...,,,,,,,,,,
2972,8/31/23 23:00,0.0,55.77038,0,7.348179,-13.70044,,,,,...,,,,,,,,,,
2973,8/31/23 23:15,-999.0,0.00000,0,7.348179,-13.70044,,,,,...,,,,,,,,,,
2974,8/31/23 23:30,-999.0,0.00000,0,7.348179,-13.70044,,,,,...,,,,,,,,,,


In [10]:
def column_inverter(df):
    known_columns = [
        "Timestamp",
        "POA Irradiance",
        "Temperature",
        "Wind Speed",
        "Meter Power",
        "Meter Voltage",
    ]
    inverter_index = 1
    for col in df.columns:
        if col not in known_columns:
            df.rename(columns={col: "Inverter_" + str(inverter_index)}, inplace=True)
            inverter_index += 1

    return df

df = column_inverter(df)
df


Unnamed: 0,Timestamp,POA Irradiance,Temperature,Wind Speed,Meter Voltage,Meter Power,Inverter_1,Inverter_2,Inverter_3,Inverter_4,...,Inverter_156,Inverter_157,Inverter_158,Inverter_159,Inverter_160,Inverter_161,Inverter_162,Inverter_163,Inverter_164,Inverter_165
0,8/1/23 00:00,0.0,63.36313,0,0.000000,0.00000,,,,,...,,,,,,,,,,
1,8/1/23 00:15,0.0,61.01357,0,0.000000,0.00000,,,,,...,,,,,,,,,,
2,8/1/23 00:30,0.0,59.10123,0,0.000000,0.00000,,,,,...,,,,,,,,,,
3,8/1/23 00:45,0.0,60.05817,0,0.000000,0.00000,,,,,...,,,,,,,,,,
4,8/1/23 01:00,0.0,58.15518,0,0.000000,0.00000,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2971,8/31/23 22:45,0.0,55.50051,0,7.329794,-13.59511,,,,,...,,,,,,,,,,
2972,8/31/23 23:00,0.0,55.77038,0,7.348179,-13.70044,,,,,...,,,,,,,,,,
2973,8/31/23 23:15,-999.0,0.00000,0,7.348179,-13.70044,,,,,...,,,,,,,,,,
2974,8/31/23 23:30,-999.0,0.00000,0,7.348179,-13.70044,,,,,...,,,,,,,,,,


In [11]:
def column_reorder(df):
    inverter_columns = sorted(col for col in df.columns if "Inverter" in col)
    columns_order = [
        "Timestamp",
        "POA Irradiance",
        "Temperature",
        "Wind Speed",
        "Meter Power",
        "Meter Voltage",
    ] + inverter_columns
    df = df[columns_order]

    return df 

df = column_reorder(df)
df

Unnamed: 0,Timestamp,POA Irradiance,Temperature,Wind Speed,Meter Power,Meter Voltage,Inverter_1,Inverter_10,Inverter_100,Inverter_101,...,Inverter_90,Inverter_91,Inverter_92,Inverter_93,Inverter_94,Inverter_95,Inverter_96,Inverter_97,Inverter_98,Inverter_99
0,8/1/23 00:00,0.0,63.36313,0,0.00000,0.000000,,,,,...,,,,,,,,,,
1,8/1/23 00:15,0.0,61.01357,0,0.00000,0.000000,,,,,...,,,,,,,,,,
2,8/1/23 00:30,0.0,59.10123,0,0.00000,0.000000,,,,,...,,,,,,,,,,
3,8/1/23 00:45,0.0,60.05817,0,0.00000,0.000000,,,,,...,,,,,,,,,,
4,8/1/23 01:00,0.0,58.15518,0,0.00000,0.000000,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2971,8/31/23 22:45,0.0,55.50051,0,-13.59511,7.329794,,,,,...,,,,,,,,,,
2972,8/31/23 23:00,0.0,55.77038,0,-13.70044,7.348179,,,,,...,,,,,,,,,,
2973,8/31/23 23:15,-999.0,0.00000,0,-13.70044,7.348179,,,,,...,,,,,,,,,,
2974,8/31/23 23:30,-999.0,0.00000,0,-13.70044,7.348179,,,,,...,,,,,,,,,,


In [20]:
# Convert the datetime string to a datetime object
def custom_to_datetime(df):
 
    formats = [
        "%m/%d/%Y %I:%M:%S %p",  # MM/DD/YYYY HH:MM:SS AM/PM
        "%m/%d/%Y %H:%M:%S",  # MM/DD/YYYY 24-hour
        "%Y-%m-%d %H:%M:%S",  # YYYY-MM-DD 24-hour
        "%d/%m/%Y %H:%M:%S",  # DD/MM/YYYY 24-hour
        "%m/%d/%y %H:%M:%S",
        "%m/%d/%y %H:%M",
        "%m-%d-%Y %H:%M:%S",
        "%m-%d-%y %H:%M:%S",
        "%m-%d-%Y %H:%M",
        "%m-%d-%y %H:%M"
        # Feel free to add more formats as needed
    ]
    for fmt in formats:
        try:
            df['Timestamp'] = pd.to_datetime(df['Timestamp'], format=fmt)
            return df  # return DataFrame if the format matches
        except ValueError:  # if the format doesn't match, continue to the next format
            continue
    raise ValueError(f"No suitable format found for the 'Timestamp' column.")  # raise error if no suitable format is found
    

df = custom_to_datetime(df)
df


Unnamed: 0,Timestamp,POA Irradiance,Temperature,Wind Speed,Meter Power,Meter Voltage,Inverter_1,Inverter_10,Inverter_100,Inverter_101,...,Inverter_90,Inverter_91,Inverter_92,Inverter_93,Inverter_94,Inverter_95,Inverter_96,Inverter_97,Inverter_98,Inverter_99
0,2023-08-01 00:00:00,0.0,63.36313,0,0.00000,0.000000,,,,,...,,,,,,,,,,
1,2023-08-01 00:15:00,0.0,61.01357,0,0.00000,0.000000,,,,,...,,,,,,,,,,
2,2023-08-01 00:30:00,0.0,59.10123,0,0.00000,0.000000,,,,,...,,,,,,,,,,
3,2023-08-01 00:45:00,0.0,60.05817,0,0.00000,0.000000,,,,,...,,,,,,,,,,
4,2023-08-01 01:00:00,0.0,58.15518,0,0.00000,0.000000,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2971,2023-08-31 22:45:00,0.0,55.50051,0,-13.59511,7.329794,,,,,...,,,,,,,,,,
2972,2023-08-31 23:00:00,0.0,55.77038,0,-13.70044,7.348179,,,,,...,,,,,,,,,,
2973,2023-08-31 23:15:00,-999.0,0.00000,0,-13.70044,7.348179,,,,,...,,,,,,,,,,
2974,2023-08-31 23:30:00,-999.0,0.00000,0,-13.70044,7.348179,,,,,...,,,,,,,,,,


In [21]:
from suntime import Sun

def site_name(file_name):
    site_name = file_name.split('_')[-1].replace(" Monthly.csv", "")
    return site_name

file_path = "./data/2023-08-01-2023-08-31_Agate Bay Monthly.csv"
site_name = site_name(file_path)
print(site_name)


Agate Bay


In [23]:
def getGeocoding(site_name):
    df = pd.read_csv("../data/geoCoding.csv")
    row = df.loc[df["Site Name"] == site_name]
    if row.empty:
        return None, None
    return row.iloc[0]["Latitude"], row.iloc[0]["Longitude"]

lat, lng = getGeocoding(site_name)
print(f"latitude: {lat}, longitude: {lng}")


latitude: 42.5225614, longitude: -122.8367639


In [25]:

# Determine wheter the missing value is from daytime or nighttime.
def determine_day_night(row):
    lat, lng = getGeocoding(site_name)
    if lat is None or lng is None:
        return "Unknown"
    sun = Sun(lat, lng)
    date = row["Timestamp"].date()
    sr = sun.get_local_sunrise_time(date).time()
    ss = sun.get_local_sunset_time(date).time()
    time = row["Timestamp"].time()
    if sr <= time <= ss:
        return "Day"
    else:
        return "Night"    

In [26]:
df["Day/Night"] = df.apply(lambda row: determine_day_night(row), axis=1)
df

Unnamed: 0,Timestamp,POA Irradiance,Temperature,Wind Speed,Meter Power,Meter Voltage,Inverter_1,Inverter_10,Inverter_100,Inverter_101,...,Inverter_91,Inverter_92,Inverter_93,Inverter_94,Inverter_95,Inverter_96,Inverter_97,Inverter_98,Inverter_99,Day/Night
0,2023-08-01 00:00:00,0.0,63.36313,0,0.00000,0.000000,,,,,...,,,,,,,,,,Night
1,2023-08-01 00:15:00,0.0,61.01357,0,0.00000,0.000000,,,,,...,,,,,,,,,,Night
2,2023-08-01 00:30:00,0.0,59.10123,0,0.00000,0.000000,,,,,...,,,,,,,,,,Night
3,2023-08-01 00:45:00,0.0,60.05817,0,0.00000,0.000000,,,,,...,,,,,,,,,,Night
4,2023-08-01 01:00:00,0.0,58.15518,0,0.00000,0.000000,,,,,...,,,,,,,,,,Night
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2971,2023-08-31 22:45:00,0.0,55.50051,0,-13.59511,7.329794,,,,,...,,,,,,,,,,Day
2972,2023-08-31 23:00:00,0.0,55.77038,0,-13.70044,7.348179,,,,,...,,,,,,,,,,Night
2973,2023-08-31 23:15:00,-999.0,0.00000,0,-13.70044,7.348179,,,,,...,,,,,,,,,,Night
2974,2023-08-31 23:30:00,-999.0,0.00000,0,-13.70044,7.348179,,,,,...,,,,,,,,,,Night


In [36]:
df.iloc[373: 380]

Unnamed: 0,Timestamp,POA Irradiance,Temperature,Wind Speed,Meter Power,Meter Voltage,Inverter_1,Inverter_10,Inverter_100,Inverter_101,...,Inverter_92,Inverter_93,Inverter_94,Inverter_95,Inverter_96,Inverter_97,Inverter_98,Inverter_99,Day/Night,Date
373,2023-08-04 21:15:00,0.0,67.9816,0,-13.51814,7.32577,,,,,...,,,,,,,,,Day,2023-08-04
374,2023-08-04 21:30:00,0.0,67.65361,0,-13.47975,7.31875,,,,,...,,,,,,,,,Day,2023-08-04
375,2023-08-04 21:45:00,0.0,67.46886,2,-13.39974,7.303783,,,,,...,,,,,,,,,Day,2023-08-04
376,2023-08-04 22:00:00,0.0,66.63525,1,-13.28392,7.284673,,,,,...,,,,,,,,,Day,2023-08-04
377,2023-08-04 22:15:00,0.0,64.91091,0,-13.20728,7.266048,,,,,...,,,,,,,,,Day,2023-08-04
378,2023-08-04 22:30:00,0.0,62.59241,0,-13.34683,7.293948,,,,,...,,,,,,,,,Day,2023-08-04
379,2023-08-04 22:45:00,0.0,60.69577,0,-13.4488,7.310098,,,,,...,,,,,,,,,Day,2023-08-04


In [27]:
import pandas as pd
from prettytable import PrettyTable
from datetime import date


In [28]:
log_messages = []

def log(message):
    log_messages.append(message)

In [30]:
def format_workorders(workorders):
    table = PrettyTable()
    table.field_names = ["Index"] + workorders.columns.tolist()
    for index, row in workorders.iterrows():
        table.add_row([index] + row.tolist())
    return table

def fetch_work_order(work_order, missing_dates, site_name):
    work_order['Fault/Event Start'] = pd.to_datetime(work_order['Fault/Event Start'], format="%b %d, %Y %I:%M:%S %p")
    work_order['Date'] = work_order['Fault/Event Start'].dt.date
    fetched_records = work_order[(work_order['Date'].isin(missing_dates)) & (work_order['Site Name'].isin([site_name]))]
    if fetched_records.empty:
        log(f"No work orders found for site {site_name} on the missing dates.")
    else:
        formatted_table = format_workorders(fetched_records.drop('Date', axis = 1))
        log(f"Information from work order:\n{formatted_table}" )

    return fetched_records


In [32]:
work_order = pd.read_csv('../data/WorkOrder.csv',skiprows=0, header=0)
missing_dates= [date(2023, 8, 11), date(2023, 8, 19)]
site_name = 'Tumbleweed'
fetched_records = fetch_work_order(work_order, missing_dates,site_name)

fetched_records

Unnamed: 0,Site Name,WO#,Fault/Event Start,Fault End,Date When Marked Complete/Incomplete,Description,Date
262,Tumbleweed,27611,2023-08-19 13:00:00,,,Tumbleweed - INV 4.06 - Offline (Islanding),2023-08-19


In [33]:
off_records = fetched_records[fetched_records["Description"].str.contains("offline", case=False, na=False)]
off_records['Date'] = off_records['Fault/Event Start'].dt.date
off_dates = off_records["Date"].tolist()
off_dates


[datetime.date(2023, 8, 19)]

In [34]:
df["Date"] = df["Timestamp"].dt.date
mask = df['Date'].isin(off_dates)
columns_to_check = ["Meter Voltage"] + [col for col in df.columns if col.startswith("Inverter_")]
missing_index = df[mask & df[columns_to_check].isna().any(axis=1)].index
df.loc[mask, columns_to_check] = df.loc[mask, columns_to_check].fillna(0)
still_missing_index = df[df[columns_to_check].isna().any(axis=1)].index

In [35]:
df.loc[missing_index]

Unnamed: 0,Timestamp,POA Irradiance,Temperature,Wind Speed,Meter Power,Meter Voltage,Inverter_1,Inverter_10,Inverter_100,Inverter_101,...,Inverter_92,Inverter_93,Inverter_94,Inverter_95,Inverter_96,Inverter_97,Inverter_98,Inverter_99,Day/Night,Date
1728,2023-08-19 00:00:00,0.0,53.83451,0,-13.46730,7.307115,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Night,2023-08-19
1729,2023-08-19 00:15:00,0.0,53.15462,0,-13.30371,7.284953,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Night,2023-08-19
1730,2023-08-19 00:30:00,0.0,52.73350,0,-13.38183,7.296949,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Night,2023-08-19
1731,2023-08-19 00:45:00,0.0,51.99919,0,-13.47667,7.309263,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Night,2023-08-19
1732,2023-08-19 01:00:00,0.0,51.88271,0,-13.56731,7.324206,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Night,2023-08-19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1819,2023-08-19 22:45:00,0.0,64.70272,1,-13.61337,7.343657,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Day,2023-08-19
1820,2023-08-19 23:00:00,0.0,65.16784,0,-13.57284,7.335833,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Day,2023-08-19
1821,2023-08-19 23:15:00,0.0,62.89948,0,-13.39706,7.303290,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Night,2023-08-19
1822,2023-08-19 23:30:00,0.0,62.14883,1,-13.41554,7.308659,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Night,2023-08-19


In [307]:
df.loc[still_missing_index]

Unnamed: 0,Timestamp,POA Irradiance,Temperature,Wind Speed,Meter Power,Meter Voltage,Inverter_1,Inverter_2,Inverter_3,Inverter_4,Day/Night,Date
419,2023-08-05 08:45:00,844.0,70.13333,1.646667,303.3047,12.30265,,,,,Night,2023-08-05
420,2023-08-05 09:00:00,874.0,71.18333,1.870000,0.0000,12.29256,,,,,Night,2023-08-05
421,2023-08-05 09:15:00,885.0,74.39000,1.310000,0.0000,12.25445,,,,,Day,2023-08-05
422,2023-08-05 09:30:00,902.0,73.70667,2.436667,0.0000,12.23089,,,,,Day,2023-08-05
423,2023-08-05 09:45:00,910.0,74.34666,2.083333,0.0000,12.23989,,,,,Day,2023-08-05
...,...,...,...,...,...,...,...,...,...,...,...,...
1840,2023-08-20 04:00:00,1.0,63.57333,3.490000,0.0000,12.34611,,,,,Night,2023-08-20
1841,2023-08-20 04:15:00,1.0,63.31000,3.923333,0.0000,12.35400,,,,,Night,2023-08-20
1842,2023-08-20 04:30:00,1.0,62.49333,3.753333,0.0000,12.34867,,,,,Night,2023-08-20
1843,2023-08-20 04:45:00,2.0,62.62000,3.173333,0.0000,12.28778,,,,,Night,2023-08-20


In [312]:
if not still_missing_index.empty:
    still_missing = df.loc[still_missing_index].drop(columns="Date")[
            (df["POA Irradiance"] > 0)
            | ((df["POA Irradiance"].isna()) & (df["Day/Night"] == "Day"))
        ]

    print(
            f"The missing 'Inverter' and 'Meter Voltage' values in the following rows cannot be handled due to lack of information.\n"
            f"{get_info(still_missing)}"
        )
else:
    print(
        f"And No missing 'Inverter' and 'Meter Voltage' values detected for the whole dataset!"
    )

still_missing_index.empty
df.loc[still_missing_index].drop(columns = 'Date')

The missing 'Inverter' and 'Meter Voltage' values in the following rows cannot be handled due to lack of information.
+-------+---------------------+----------------+-------------+------------+-------------+---------------+------------+------------+------------+------------+-----------+
| Index |      Timestamp      | POA Irradiance | Temperature | Wind Speed | Meter Power | Meter Voltage | Inverter_1 | Inverter_2 | Inverter_3 | Inverter_4 | Day/Night |
+-------+---------------------+----------------+-------------+------------+-------------+---------------+------------+------------+------------+------------+-----------+
|  421  | 2023-08-05 08:45:00 |     844.0      |   70.13333  |  1.646667  |   303.3047  |    12.30265   |    nan     |    nan     |    nan     |    nan     |   Night   |
|  422  | 2023-08-05 09:00:00 |     874.0      |   71.18333  |    1.87    |     0.0     |    12.29256   |    nan     |    nan     |    nan     |    nan     |   Night   |
|  423  | 2023-08-05 09:15:00 | 

  still_missing = df.loc[still_missing_index].drop(columns="Date")[


Unnamed: 0,Timestamp,POA Irradiance,Temperature,Wind Speed,Meter Power,Meter Voltage,Inverter_1,Inverter_2,Inverter_3,Inverter_4,Day/Night
419,2023-08-05 08:45:00,844.0,70.13333,1.646667,303.3047,12.30265,,,,,Night
420,2023-08-05 09:00:00,874.0,71.18333,1.870000,0.0000,12.29256,,,,,Night
421,2023-08-05 09:15:00,885.0,74.39000,1.310000,0.0000,12.25445,,,,,Day
422,2023-08-05 09:30:00,902.0,73.70667,2.436667,0.0000,12.23089,,,,,Day
423,2023-08-05 09:45:00,910.0,74.34666,2.083333,0.0000,12.23989,,,,,Day
...,...,...,...,...,...,...,...,...,...,...,...
1840,2023-08-20 04:00:00,1.0,63.57333,3.490000,0.0000,12.34611,,,,,Night
1841,2023-08-20 04:15:00,1.0,63.31000,3.923333,0.0000,12.35400,,,,,Night
1842,2023-08-20 04:30:00,1.0,62.49333,3.753333,0.0000,12.34867,,,,,Night
1843,2023-08-20 04:45:00,2.0,62.62000,3.173333,0.0000,12.28778,,,,,Night
