In [2]:
# Imports
from aws_snowflake_config import aws_s3_connection, snowflake_connection
import logging
from botocore.exceptions import ClientError
import os

In [5]:
s3_client, bucket_name = aws_s3_connection()

# Get a list of all the files with the extension in the directory_path
def get_files_in_directory(directory_path, extension):
    files=[]
    for f in os.listdir(directory_path):
        if f.endswith(extension):
            files.append(f)
    return files

# Function  to upload the file of type extension to S3 bucket
def upload_file_to_s3(file_path, type, extension):
    try:
        # Fetching all files in the directory with the given extension
        list_of_files = get_files_in_directory(file_path,extension)
        print("\n",type.rstrip("/"),"files:",)
        print(list_of_files)
        
        # Upload if files exist
        if list_of_files:
            for file in list_of_files:
                try:
                    file_full_path=os.path.join(file_path, file)    # file path in local directory
                    key_value = type + file   # file path in S3 bucket
                    
                    # Upload the file to S3 using boto3
                    response = s3_client.upload_file(file_full_path, bucket_name, key_value)
                    print(f'File uploaded successfully: {file_full_path} -> s3://{bucket_name}/{key_value}')
                except ClientError as e:
                    logging.error(e)
        else:
            print(f'No files found in the {file_path} directory.')
    except Exception as e:
        print(f'Error uploading file {file_path}: {e}')

upload_file_to_s3('../web-scraping-and-dataset/','CSV_Data/','.csv')
upload_file_to_s3('../pdf-extractions/pypdf','PyPDF/','.txt')
upload_file_to_s3('../pdf-extractions/grobid','Grobid/','.txt')



 CSV_Data files:
['scraped_data.csv']
File uploaded successfully: ../web-scraping-and-dataset/scraped_data.csv -> s3://bigdata-group3-assignment2/CSV_Data/scraped_data.csv

 PyPDF files:
['PyPDF_RR_2024_l1_combined.txt', 'PyPDF_RR_2024_l2_combined.txt', 'PyPDF_RR_2024_l3_combined.txt']
File uploaded successfully: ../pdf-extractions/pypdf\PyPDF_RR_2024_l1_combined.txt -> s3://bigdata-group3-assignment2/PyPDF/PyPDF_RR_2024_l1_combined.txt
File uploaded successfully: ../pdf-extractions/pypdf\PyPDF_RR_2024_l2_combined.txt -> s3://bigdata-group3-assignment2/PyPDF/PyPDF_RR_2024_l2_combined.txt
File uploaded successfully: ../pdf-extractions/pypdf\PyPDF_RR_2024_l3_combined.txt -> s3://bigdata-group3-assignment2/PyPDF/PyPDF_RR_2024_l3_combined.txt

 Grobid files:
[]
No files found in the ../pdf-extractions/grobid directory.
