James Caldwell <br>
9/12/24 <br>
Annual Program Review<br>
<br>
This program loads excel files with: <br>
    Number of undergraduate majors  <br>
    Percentage who were female<br>
    Percentage who were international<br>
    Percentage who were under-represented minority<br>
    Percentage who were first generation<br>
    Percentage who were transfer students<br>
    Percentage who were low income<br>
    <br>
And sorts them into a single excel file for the APR format

In [17]:
import os
import tkinter as tk
from tkinter import filedialog
import pandas as pd
import numpy as np
from openpyxl import load_workbook  # Required for appending sheets to an existing Excel file

def select_folder():
    # Hide the root window
    # root = tk.Tk()
    # root.withdraw()

    # Open a dialog to select a folder
    folder_path = filedialog.askdirectory(title="Select a Folder")

    return folder_path

def load_excel_files(excel_list):

    # Load the first Excel file to initialize the DataFrame with the correct column headers
    try: 
        data_raw = pd.read_excel(excel_list[0])
        data_raw.insert(0, 'Category', excel_list[0].replace('.xlsx', ''))

        # Iterate over the rest of the Excel files and append their data to the initialized DataFrame
        for excel_file in excel_list[1:]:
            try: 

                df = pd.read_excel(excel_file)
                df = round(df,2)
            except Exception as e:
                # If there's an error, create a DataFrame with 1 row and 5 columns of NaN values
                df = pd.DataFrame([[np.nan] * 5])

            # Add a column for the file name
            df.insert(0, 'Category', excel_file.replace('.xlsx', ''))
            # Append the data to the main DataFrame
            data_raw = pd.concat([data_raw, df], ignore_index=True)

        return data_raw
    except:
        pass

def load_and_write(major_or_minor):
    excel_list = ['counts.xlsx',
                'female.xlsx',
                'international.xlsx',
                'minority.xlsx',
                'first generation.xlsx',
                'transfer.xlsx',
                'low income.xlsx'
                ]
    
    # If degree recipients and phd, also get time to degree files
    current_directory = os.getcwd()
    folder_to_find = os.path.join('degree recipients', 'phd')
    if folder_to_find in current_directory:
        excel_list = excel_list + ['avg time to deg.xlsx','median time to deg.xlsx']
        
    data_raw = load_excel_files(excel_list)
    try:
        data_raw.drop(columns=['Degree Level Desc', 'Term Desc'], inplace=True, errors='ignore')
    except:
        pass
    return data_raw

if __name__ == "__main__":

    # folder_path = select_folder()
    folder_path = r'V:\Christina\Academic Program Review\2024 Program Review\Qlik data\Sociology'

    folder_types = ['enrollment', 'degree recipients']
    save_name = '1combined_data.xlsx'

    # Delete the file if it already exists so we can make a new one
    save_filepath = os.path.join(folder_path,save_name)
    if os.path.exists(save_filepath):
        os.remove(save_filepath)

    for folder_type in folder_types:
        # Navigate to the enrollment folder
        os.chdir(os.path.join(folder_path, folder_type))

        # Get all folders in the directory
        folders = [name for name in os.listdir() if os.path.isdir(os.path.join(os.getcwd(), name))]
        dataframes = {}  # Dictionary to store each DataFrame with folder name as key

        for folder in folders:
            # Create the full path to the current folder
            os.chdir(os.path.join(folder_path, folder_type))
            file_path_here = os.path.join(os.getcwd(), folder)

            print(file_path_here)
            # Change directory to the current folder
            os.chdir(file_path_here)

            # Load and process the Excel files for the folder
            df = load_and_write(folder)
            
            # Store the DataFrame in the dictionary with the folder name as the key
            dataframes[folder] = df

        os.chdir(folder_path)
        
        # Check if the file already exists
        if os.path.exists(save_name):
            # Use append mode and the 'openpyxl' engine to add sheets
            with pd.ExcelWriter(save_name, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
                # Loop through DataFrames and add each as a new sheet
                for deg_level in dataframes.keys():
                    if dataframes[deg_level] is not None:
                        sheet_name_here = folder_type + '_' + deg_level
                        dataframes[deg_level].to_excel(writer, index=False, sheet_name=sheet_name_here)
        else:
            # If the file doesn't exist, create it and write the DataFrames
            with pd.ExcelWriter(save_name, engine='openpyxl') as writer:
                for deg_level in dataframes.keys():
                    if dataframes[deg_level] is not None:
                        sheet_name_here = folder_type + '_' + deg_level
                        dataframes[deg_level].to_excel(writer, index=False, sheet_name=sheet_name_here)  





V:\Christina\Academic Program Review\2024 Program Review\Qlik data\Sociology\enrollment\major
V:\Christina\Academic Program Review\2024 Program Review\Qlik data\Sociology\enrollment\masters
V:\Christina\Academic Program Review\2024 Program Review\Qlik data\Sociology\enrollment\minor
V:\Christina\Academic Program Review\2024 Program Review\Qlik data\Sociology\enrollment\phd
V:\Christina\Academic Program Review\2024 Program Review\Qlik data\Sociology\degree recipients\BA
V:\Christina\Academic Program Review\2024 Program Review\Qlik data\Sociology\degree recipients\MA
V:\Christina\Academic Program Review\2024 Program Review\Qlik data\Sociology\degree recipients\phd
