In [1]:
import pandas as pd
import os
import boto3
from zipfile import ZipFile
import shutil
import numpy as np

In [12]:
def process_csv_files(output_folder_name, domain_name):

    # Initialize the S3 client
    s3_client = boto3.client('s3')

    # Bucket name and file paths
    bucket_name = 'ca-climate-index'
    directory = '3_fair_data/dummy_data/dummy_dataset.zip'

    # Local directory to store the downloaded zip file and extracted contents
    local_directory = 'dummy_dataset'
    if not os.path.exists(local_directory):
        os.makedirs(local_directory)

    # Download the zip file
    local_zip_file_path = os.path.join(local_directory, os.path.basename(directory))
    s3_client.download_file(bucket_name, directory, local_zip_file_path)

    # Extract the contents of the zip file
    with ZipFile(local_zip_file_path, 'r') as zip_ref:
        zip_ref.extractall(local_directory)
        # List all files in the input folder

    files = os.listdir('dummy_dataset')
    
    # Filter files that start with 'DUMMY_built' and end with '.csv'
    csv_files = [file for file in files if file.startswith(domain_name) and file.endswith('.csv')]
    
    # Create the output folder if it doesn't exist
    if not os.path.exists(output_folder_name):
        os.makedirs(output_folder_name)
    
    # Loop through each CSV file
    for file in csv_files:
        # Read the CSV file
        df = pd.read_csv(os.path.join(local_directory, file))
        
        # Get the second column
        data = df.iloc[:, 1]

        # Calculate the average of the 'data_standardized' column
        standardized_average = data.mean()

        # Create a new CSV file for the standardized average
        avg_output_file_path = os.path.join(output_folder_name, f"{os.path.splitext(file)[0]}_average.csv")
        with open(avg_output_file_path, 'w') as avg_file:
            avg_file.write(f"average_metric_score\n{standardized_average}\n")

    #os.remove(local_directory)

def sum_domain_averages(metric_avgs_folder, output_domain_folder_name, domain_name):
    
    # List all files in the output folder
    files = os.listdir(metric_avgs_folder)
    
    # Create the output folder if it doesn't exist
    if not os.path.exists(output_domain_folder_name):
        os.makedirs(output_domain_folder_name)

    # Filter files that start with the domain_name and end with '_average.csv'
    domain_files = [file for file in files if file.startswith(domain_name) and file.endswith('_average.csv')]
    
    domain_sum = 0
    # Loop through each domain file
    for file in domain_files:
        # Read the average file
        avg_df = pd.read_csv(os.path.join(metric_avgs_folder, file))
        # Get the average value
        avg_value = avg_df.iloc[0, 0]
        # Sum the average values
        domain_sum += avg_value
    
        # Write the sum to a new file
    sum_output_file_path = os.path.join(output_domain_folder_name, f"{domain_name}_sum.csv")
    with open(sum_output_file_path, 'w') as sum_file:
        sum_file.write(f"summed_average_metric_score\n{domain_sum}\n")
    
    # Upload the sum file to AWS S3
    s3_client = boto3.client('s3')
    bucket_name = 'ca-climate-index'
    s3_client.upload_file(sum_output_file_path, bucket_name, f"3_fair_data/dummy_data/{domain_name}_sum.csv")
    print('Summed indicators uploaded to aws')
    #os.remove(metric_avgs_folder)

In [13]:
list = ['DUMMY_built',
        'DUMMY_society',
        'DUMMY_governance',
        'DUMMY_natural'
        ]

for file in list:
    process_csv_files('metric_averages', file)
    
for file in list:
    sum_domain_averages('metric_averages', 'domain_sum', file)

Summed indicators uploaded to aws
Summed indicators uploaded to aws
Summed indicators uploaded to aws
Summed indicators uploaded to aws


In [19]:
def process_csv_files(output_folder_name, domain_name):

    # Initialize the S3 client
    s3_client = boto3.client('s3')

    # Bucket name and file paths
    bucket_name = 'ca-climate-index'
    directory = '3_fair_data/dummy_data/dummy_dataset.zip'

    # Local directory to store the downloaded zip file and extracted contents
    local_directory = 'dummy_dataset'
    if not os.path.exists(local_directory):
        os.makedirs(local_directory)

    # Download the zip file
    local_zip_file_path = os.path.join(local_directory, os.path.basename(directory))
    s3_client.download_file(bucket_name, directory, local_zip_file_path)

    # Extract the contents of the zip file
    with ZipFile(local_zip_file_path, 'r') as zip_ref:
        zip_ref.extractall(local_directory)
        # List all files in the input folder

    files = os.listdir('dummy_dataset')
    
    # Filter files that start with 'DUMMY_built' and end with '.csv'
    csv_files = [file for file in files if file.startswith(domain_name) and file.endswith('.csv')]
    
    # Create the output folder if it doesn't exist
    if not os.path.exists(output_folder_name):
        os.makedirs(output_folder_name)
    
    # Loop through each CSV file
    for file in csv_files:
        # Read the CSV file
        df = pd.read_csv(os.path.join(local_directory, file))
        
        # Get the second column
        data = df.iloc[:, 1]

        # Calculate the average of the 'data_standardized' column
        standardized_average = data.mean()

        # Create a new CSV file for the standardized average
        avg_output_file_path = os.path.join(output_folder_name, f"{os.path.splitext(file)[0]}_average.csv")
        with open(avg_output_file_path, 'w') as avg_file:
            avg_file.write(f"average_metric_score\n{standardized_average}\n")

    #os.remove(local_directory)

def sum_domain_averages(metric_avgs_folder, output_domain_folder_name, domain_name):
    
    # List all files in the output folder
    files = os.listdir(metric_avgs_folder)
    
    # Create the output folder if it doesn't exist
    if not os.path.exists(output_domain_folder_name):
        os.makedirs(output_domain_folder_name)

    # Filter files that start with the domain_name and end with '_average.csv'
    domain_files = [file for file in files if file.startswith(domain_name) and file.endswith('_average.csv')]
    
    # Collect individual average values
    individual_averages = []
    
    # Loop through each domain file
    for file in domain_files:
        # Read the average file
        avg_df = pd.read_csv(os.path.join(metric_avgs_folder, file))
        # Get the average value
        avg_value = avg_df.iloc[0, 0]
        # Add the average value to the list
        individual_averages.append(avg_value)
    
    # Sum the individual average values
    domain_sum = sum(individual_averages)
    
    # Perform min-max standardization on the summed value
    x_min = min(individual_averages)
    x_max = max(individual_averages)
    domain_sum_standardized = (domain_sum - x_min) / (x_max - x_min)
    
    # Write the sum and standardized sum to a new file
    sum_output_file_path = os.path.join(output_domain_folder_name, f"{domain_name}_sum.csv")
    with open(sum_output_file_path, 'w') as sum_file:
        sum_file.write(f"summed_average_metric_score,summed_average_metric_score_standardized\n")
        sum_file.write(f"{domain_sum},{domain_sum_standardized}\n")
    '''
    # Upload the sum file to AWS S3
    s3_client = boto3.client('s3')
    bucket_name = 'your_bucket_name'  # Replace with your S3 bucket name
    s3_client.upload_file(sum_output_file_path, bucket_name, f"{output_domain_folder_name}/{domain_name}_sum.csv")
    '''
    #os.remove(metric_avgs_folder)

In [20]:
list = ['DUMMY_built',
        'DUMMY_society',
        'DUMMY_governance',
        'DUMMY_natural'
        ]

for file in list:
    process_csv_files('metric_averages', file)
    
for file in list:
    sum_domain_averages('metric_averages', 'domain_sum', file)