In [41]:
import requests
import os
import boto3
import pandas as pd
from dotenv import load_dotenv
from github_python_fetch import fetch_function
from query_package import get_query_package

# GitHub Access 
token = os.getenv('GITHUBTOKEN')

# AWS Access
access = os.getenv('ACCESS')
secret = os.getenv('SECRET')

# RDS Access
username = os.getenv('USER')
password = os.getenv('PASSWORD')
server = os.getenv('SERVER')
db = os.getenv('DB')


# Access RDS Functions via GitHub
rds_functions_url = 'https://raw.githubusercontent.com/CharlesRoss-MBakerIntl/Tidal-Basin-Functions/main/rds_connector.py' # Set url to python file of github
rds_functions = fetch_function(rds_functions_url, token) # Pull function from github using requests
exec(rds_functions) # Execute the file


# Pull Query Package from File
query_package = get_query_package()


# Store RDS Functions Locally
#conn, cursor = rds_connection(username, password, db, server) # Connect to RDS Database
#rds = RDSTablePull(conn, cursor, query_package) # Create Instance of RDS Table

In [None]:
for item in rds.cleaning_versions:
    
    # Create File Name
    if item['Field'] == None:
        file_name = f"{item['Step']}"
    else:
        file_name = f"{item['Field']}: {item['Step']}"

    # Store DataFrame as CSV
    #data = item['Result'].to_csv(f"{file_name}.csv")

In [3]:
# Bucket and file information
bucket_name = 'reporting-external'

# Create a S3 client
s3 = boto3.client('s3', aws_access_key_id=access, aws_secret_access_key=secret)

result = s3.list_objects_v2(Bucket=bucket_name, Delimiter='/')

In [43]:
def update_active_data(s3, bucket, folder, file_name, data):

    #Set Folder Prefix to Active Data Folder
    folder_prefix = folder + 'Active-Data/'
    
    # Check if the file exists in the specified folder
    try:
        #Pull Results for File in S3 Bucket
        result = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_prefix + file_name)

        #Check if File Exists in Folder
        file_exists = 'Contents' in result and any(item['Key'] == folder_prefix + file_name for item in result['Contents'])

        #If the file exists
        if file_exists:
            try:
                
                #Grab Final Dataset from Data
                final_data = data

                # Update the file with a new version
                s3.put_object(Bucket = bucket, Key = folder_prefix + file_name, Body = final_data.to_csv())
            
            except Exception as e:
                raise Exception(f"Error: Failed to Overwrite Active csv file {file_name} in {folder}")

        else:
            raise Exception(f"Error: Could not update active csv, '{file_name}' does not exist within {folder}")

    except Exception as e:
        raise Exception(f"Error: Could not update active csv file in {folder}: {e}")

In [42]:
# Upload the CSV to S3, overwriting the existing file
s3.put_object(Bucket=bucket_name, Key="FDEM-Hotel-Summary/Active-Data/Active_FDEM_Hotel_Summary.csv", Body=pd.DataFrame().to_csv())

{'ResponseMetadata': {'RequestId': 'BWJME6PF5PX44ECH',
  'HostId': 'Ol9yXqHDGxAynGLMwoKsCLjfRGTIAWubccH2+SRRabJjv3H/rzNWEzN+dRM8RIwB6AAYfLqPA/VWz3grbs1WQw==',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'Ol9yXqHDGxAynGLMwoKsCLjfRGTIAWubccH2+SRRabJjv3H/rzNWEzN+dRM8RIwB6AAYfLqPA/VWz3grbs1WQw==',
   'x-amz-request-id': 'BWJME6PF5PX44ECH',
   'date': 'Wed, 30 Oct 2024 17:58:29 GMT',
   'x-amz-server-side-encryption': 'AES256',
   'etag': '"0ae9bcd0c0b0aa5aab99d84beca26ce8"',
   'content-length': '0',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'ETag': '"0ae9bcd0c0b0aa5aab99d84beca26ce8"',
 'ServerSideEncryption': 'AES256'}

In [23]:
def add_data_package(data):
    pass

In [33]:
def add_active_data(s3, bucket, folder_prefix, data):

    active_prefix = folder_prefix + "Active-Data"

    # List objects within the specified folder
    result = s3.list_objects_v2(Bucket=bucket_name, Prefix=active_prefix, Delimiter='/')

    #Grab Folder Objects
    active_objects = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_key)

In [22]:
def add_s3_folder(s3, bucket, folder_name, data = None, folder_prefix = None, limit = None):
    
    #If Folder Path Passed
    if folder_prefix is not None:

        try:

            # List objects within the specified folder
            result = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_prefix, Delimiter='/')
            
            #If Limit is Passed
            if limit is not None:
                
                #Check if CommonPrefixes in Result
                if 'CommonPrefixes' in result:
                    subfolders = result['CommonPrefixes']   #Find All Sub-Folders within Passed Folder
                    folder_count = len(subfolders)   #Store Folder Count

                    #Check if Folder Count Over Passed Limit
                    if folder_count > limit:
                        
                        # Fetch all subfolder objects to get their last modified date
                        folder_dates = []
                        for folder in subfolders:
                            
                            #Set Folder Key
                            folder_key = folder['Prefix']

                            #Grab Folder Objects
                            folder_objects = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_key)
                            
                            #Check if Contents in Folder Objects
                            if 'Contents' in folder_objects:
                                
                                # Get the oldest object's last modified date
                                oldest_object = min(folder_objects['Contents'], key=lambda x: x['LastModified'])
                                folder_dates.append((folder_key, oldest_object['LastModified']))
                            
                            #Raise Exception if no Contents Found
                            else:
                                raise Exception(f"Contents not found in Folder List in {folder_key}")


                            # Sort folders by date and delete the oldest one
                            folder_dates.sort(key=lambda x: x[1])

                            # Set Key to Oldest Folder
                            oldest_folder_key = folder_dates[0][0]

                            #Delete Oldest Folder
                            s3.delete_object(Bucket=bucket_name, Key=oldest_folder_key)

                            #Add New Folder
                            s3.put_object(Bucket=bucket_name, Key=folder_prefix + folder_name)

                            #Add Data
                            add_data_package(data)


                else:
                    raise Exception(f"Error: No folders found in {folder_prefix}")
            

            #No Limit, Add Folder to Folder Path
            elif limit is None:
                
                #Add New Folder
                s3.put_object(Bucket=bucket_name, Key=folder_prefix + folder_name)

                #Add Data
                add_data_package(data)


        except Exception as e:
            print(f"Error occurred: {e}")



    #No Folder Path Add, Add Folder to Main Bucket
    elif folder_prefix is None:
        
        #Add New Folder
        s3.put_object(Bucket=bucket_name, Key=folder_name)

        #Add Data
        add_data_package(data)

In [19]:
import boto3
from datetime import datetime

# Bucket and folder information
bucket_name = 'reporting-external'
folder_prefix = 'FDEM-Hotel-Summary/Archived-Processing-Steps/'



try:
    # List objects within the specified folder
    result = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_prefix, Delimiter='/')
    
    if 'CommonPrefixes' in result:

        #Find All Sub-Folders within Passed Folder
        subfolders = result['CommonPrefixes']

        #Store Folder Count
        folder_count = len(subfolders)
 

        #Check if Folder Count Over Passed Limit
        if folder_count > 30:
            
            # Fetch all subfolder objects to get their last modified date
            folder_dates = []
            for folder in subfolders:
                
                folder_key = folder['Prefix']
                folder_objects = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_key)
                
                if 'Contents' in folder_objects:
                    # Get the oldest object's last modified date
                    oldest_object = min(folder_objects['Contents'], key=lambda x: x['LastModified'])
                    folder_dates.append((folder_key, oldest_object['LastModified']))

            # Sort folders by date and delete the oldest one
            folder_dates.sort(key=lambda x: x[1])
            oldest_folder_key = folder_dates[0][0]

            print(f"Deleting the oldest folder: {oldest_folder_key}")
            s3.delete_object(Bucket=bucket_name, Key=oldest_folder_key)
            print(f"Folder '{oldest_folder_key}' has been deleted.")

        else:
            for folder in subfolders:
                print(folder['Prefix'])
    else:
        print(f"No folders found in {folder_prefix}")

except Exception as e:
    print(f"Error occurred: {e}")





Number of folders: 1
FDEM-Hotel-Summary/Archived-Processing-Steps/2024_10_29/


In [20]:
subfolders

[{'Prefix': 'FDEM-Hotel-Summary/Archived-Processing-Steps/2024_10_29/'}]

In [5]:
s3.put_object(Bucket=bucket_name, Key='TESTFOLDER')


{'ResponseMetadata': {'RequestId': 'GFEY835AM7VDB6DE',
  'HostId': 'SicqyPB8hYylUqIYvwUfUwYhVdhM2tjJhRRwIovwBFOeJOWhWiv5PgoPGN+dQz8KRo/aADt0XpY=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'SicqyPB8hYylUqIYvwUfUwYhVdhM2tjJhRRwIovwBFOeJOWhWiv5PgoPGN+dQz8KRo/aADt0XpY=',
   'x-amz-request-id': 'GFEY835AM7VDB6DE',
   'date': 'Tue, 29 Oct 2024 21:31:50 GMT',
   'x-amz-server-side-encryption': 'AES256',
   'etag': '"d41d8cd98f00b204e9800998ecf8427e"',
   'content-length': '0',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'ETag': '"d41d8cd98f00b204e9800998ecf8427e"',
 'ServerSideEncryption': 'AES256'}

In [6]:
s3.delete_object(Bucket=bucket_name, Key='TESTFOLDER')

{'ResponseMetadata': {'RequestId': '65MJD1CJHWFTA60M',
  'HostId': '1Lk2nBZUnsjI6amk43T8OpQhnd5geJ/PlUGqMhRLR0hcil7efHpAymVKAuJKR8ReTm6M75IbwCzRCrf3AdU/1g==',
  'HTTPStatusCode': 204,
  'HTTPHeaders': {'x-amz-id-2': '1Lk2nBZUnsjI6amk43T8OpQhnd5geJ/PlUGqMhRLR0hcil7efHpAymVKAuJKR8ReTm6M75IbwCzRCrf3AdU/1g==',
   'x-amz-request-id': '65MJD1CJHWFTA60M',
   'date': 'Tue, 29 Oct 2024 21:33:49 GMT',
   'server': 'AmazonS3'},
  'RetryAttempts': 0}}