$$\large \text{Packages & Specs} $$

In [None]:
import sys
import os
import pandas as pd
import numpy as np
import pvlib
import re

import matplotlib.pyplot as plt
import plotly.express as px

module_path = re.sub(r'Notebooks','Python Scripts',os.getcwd())
sys.path.append(module_path)
from pv_modules import *

$$\large \text{DataFrame Cleaner} $$

In [None]:
def df_cleaner(df,file):

    # ==== reshaping df for timestap & adjusted headers ==== #
    df = reshape_df(df,file)

    # === filling gaps in time intervals === #
    df,_ = add_missing_times(df)

    # # ==== Using PvLib to remove nightime values === #
    df = remove_night(df)
    
    if file == 'Irradiance':
        # === Removing Values for Irradiance === #
        df = clean_irradiance_values(df)

    else:
        # === Removing Values for Deger & Fixed === #
        df = clean_deger_fixed_values(df)
    
    return df

$$\large \text{Summary of NaN Values} $$

In [None]:
def summarize_nan(df):
    total_nan = df.isna().sum().sum()
    total_values = df.size
    mt_count = df.isna().all(axis=1).sum()
    t_perc = round(total_nan/total_values * 100,3)
    mt_perc = round(mt_count*len(df.columns)/total_values * 100,3)
    
    col_name = []
    col_perc = ()
    
    for col in df.columns:
        n_miss = df[col].isna().sum()
        perc = round(n_miss / total_values * 100,3)
        col_name += [col]
        col_perc += (perc,)

    return t_perc,mt_perc,col_name,col_perc


$$\large \text{Creating a dataframe of months and NaN values} $$

In [None]:
def NaN_by_month(path_list,file):
    
    month_data = []
    
    for path in path_list:
        df = pd.read_csv(path,sep="\t|,",engine='python')
        if df.empty:
            raise Exception(f"The path: {path} loaded an empty dataframe.")
        df = df_cleaner(df,file)
        nan_perc,m_perc,col_name,col_perc = summarize_nan(df)
        df.index = df.index.tz_localize(None)
        month_data += [(df.index[0],nan_perc,m_perc) + col_perc]
        
    month_data = sorted(month_data, key = lambda x : x[1])
    return pd.DataFrame(month_data, columns = ['Month','Total NaN %','System Outage NaN %'] + col_name).set_index('Month')

$$\large \text{Function Calls} $$

In [None]:
def main(file):
    
    path = re.sub(r'Notebooks|Python Scripts','Support Files/',os.getcwd())
    file_data = pd.read_csv(path + f'{file}_NaN_All.csv', index_col = 'Month').sort_index()
    col = [col for col in file_data.columns if col != 'Month']
    fig = px.line(file_data, x=file_data.index, y=col, title=f"{file}: Percentage of NaN by Month")
    fig.update_xaxes(
        rangeslider_visible=True,
        rangeselector=dict(
            buttons=list([
                dict(count=6, label="6m", step="month", stepmode="backward"),
                dict(count=1, label="1y", step="year", stepmode="backward"),
                dict(step="all")])))
    fig.show()
    file_data.index = pd.to_datetime(file_data.index)
    file_data.index = file_data.index.strftime('%B %Y')
    file_data = file_data.sort_values('Total NaN %')
    latex_code = file_data.to_latex(index=True)

    # Print LaTeX code
#     print(latex_code)

def update(file):
    path_list = get_file_paths(file)
    update_df = NaN_by_month(path_list,file)
    path = re.sub(r'Notebooks|Python Scripts','Support Files/',os.getcwd())
    update_df.to_csv(path + f'{file}_NaN_All.csv')
    main(file)
    
file = input("File (opt: Irradiance/Deger/Fixed): ")
response = input("Last update: May 23rd 2023 \n To continue press: 'Enter' \n Else type: 'update()' \n\t")
if not [file_i for file_i in ['Irradiance','Deger','Fixed'] if re.search(fr'{file}',file_i)]:
    raise Exception(f"Incorret Input: File")
if response == 'update()':
    update(file)
else:
    main(file)