In [1]:
import os
import pandas as pd
from openpyxl.utils.exceptions import InvalidFileException
from zipfile import BadZipFile
import numpy as np

In [8]:
"""
    Convert Excel files (.xlsx) in a specified directory to CSV format.
    
    This function scans through all the Excel files in the given input directory,
    converts each sheet (except for 'Sheet 1') into a CSV file, and saves the
    CSV files into a year-month based directory structure under the given base output directory.
"""

def xlsx_to_csv(input_directory, base_output_directory):
    os.makedirs(base_output_directory, exist_ok=True)
    for file in os.scandir(input_directory):
        try:
            xls = pd.ExcelFile(file.path)
            for sheet_name in xls.sheet_names:
                if sheet_name != "Sheet 1":
                    df = pd.read_excel(file.path, sheet_name=sheet_name)
                    year_date = file.name.split('_')[0][:7]  
                    output_directory = os.path.join(base_output_directory, year_date)
                    os.makedirs(output_directory, exist_ok=True)

                    csv_file_name = f"{year_date}_{sheet_name}.csv"
                    csv_file_path = os.path.join(output_directory, csv_file_name)
                    df.to_csv(csv_file_path, index=False)
                    print(f"Converted {file.name} sheet {sheet_name} to CSV format.")

        except BadZipFile as e:
            print(f"Could not process {file.name}: File is not a valid zip file.")
        except InvalidFileException as e:
            print(f"Could not process {file.name}: File is not a valid Excel file.")
        except Exception as e:
            print(f"An unexpected error occurred while processing {file.name}: {e}")

In [9]:
xlsx_to_csv("../raw-data", "../data")

Converted 2022-11-01-2022-11-30_NESM - Availability.xlsx sheet Chiloquin Solar Farm to CSV format.
Converted 2022-11-01-2022-11-30_NESM - Availability.xlsx sheet Cotten Farm to CSV format.
Converted 2022-11-01-2022-11-30_NESM - Availability.xlsx sheet County Home Solar Center, LLC to CSV format.
Converted 2022-11-01-2022-11-30_NESM - Availability.xlsx sheet Dairy Solar to CSV format.
Converted 2022-11-01-2022-11-30_NESM - Availability.xlsx sheet Davis Lane Solar, LLC to CSV format.
Converted 2022-11-01-2022-11-30_NESM - Availability.xlsx sheet Faison to CSV format.
Converted 2022-11-01-2022-11-30_NESM - Availability.xlsx sheet Four Oaks to CSV format.
Converted 2022-11-01-2022-11-30_NESM - Availability.xlsx sheet Freemont Solar Center to CSV format.
Converted 2022-11-01-2022-11-30_NESM - Availability.xlsx sheet Gauss Solar to CSV format.
Converted 2022-11-01-2022-11-30_NESM - Availability.xlsx sheet Jersey Solar to CSV format.
Converted 2022-11-01-2022-11-30_NESM - Availability.xlsx sh

In [6]:
def create_outages_files(input_directory, output_directory):
    # Ensure the output directory exists
    os.makedirs(output_directory, exist_ok=True)

    for file in os.scandir(input_directory):
        if file.name.endswith(".csv"):
            site_name = file.name.replace(".csv", "").split("_")[-1]
            # Create a new filename
            new_filename = f"ExclusiveOutages_{site_name}.csv"
            new_filepath = os.path.join(output_directory, new_filename)
            with open(new_filepath, 'w') as new_file:
                new_file.write('start_time,end_time,inverter_id\n')
            print(f"Created {new_filepath}")

In [7]:
create_outages_files("../data/2022-09", "../data/exclusions")

Created ../data/exclusions/ExclusiveOutages_Siler 421 Farm.csv
Created ../data/exclusions/ExclusiveOutages_Chiloquin Solar Farm.csv
Created ../data/exclusions/ExclusiveOutages_Gauss Solar.csv
Created ../data/exclusions/ExclusiveOutages_Schell Solar Farm.csv
Created ../data/exclusions/ExclusiveOutages_NC Solar II.csv
Created ../data/exclusions/ExclusiveOutages_Red Oak Solar Farm.csv
Created ../data/exclusions/ExclusiveOutages_S. Robeson Solar.csv
Created ../data/exclusions/ExclusiveOutages_Tumbleweed Solar Farm.csv
Created ../data/exclusions/ExclusiveOutages_Four Oaks.csv
Created ../data/exclusions/ExclusiveOutages_Lakeview Solar.csv
Created ../data/exclusions/ExclusiveOutages_Turkey Hill Solar.csv
Created ../data/exclusions/ExclusiveOutages_Davis Lane Solar, LLC.csv
Created ../data/exclusions/ExclusiveOutages_NC Solar I.csv
Created ../data/exclusions/ExclusiveOutages_Merrill Solar.csv
Created ../data/exclusions/ExclusiveOutages_Dairy Solar.csv
Created ../data/exclusions/ExclusiveOutage