In [17]:
import boto3
import pandas as pd
import json

In [18]:
s3 = boto3.client('s3')

In [19]:
def list_all_objects(bucket, prefix):
    all_objects = []
    continuation_token = None
    
    while True:
        if continuation_token:
            response = s3.list_objects_v2(Bucket=bucket, Prefix=prefix, ContinuationToken=continuation_token)
        else:
            response = s3.list_objects_v2(Bucket=bucket, Prefix=prefix)
        
        if 'Contents' in response:
            all_objects.extend([obj['Key'] for obj in response['Contents']])
        
        if not response.get('NextContinuationToken'):
            break
        
        continuation_token = response['NextContinuationToken']
    
    return all_objects

In [20]:
def list_csv_files(bucket, prefix):
    csv_files = []
    response = s3.list_objects_v2(Bucket=bucket, Prefix=prefix)
    if 'Contents' in response:
        for obj in response['Contents']:
            if obj['Key'].endswith('.csv'):
                csv_files.append(obj['Key'])
    return csv_files



In [21]:
def read_csv_from_s3(bucket, file_key):
    response = s3.get_object(Bucket=bucket, Key=file_key)
    df = pd.read_csv(response['Body'])
    return df

In [22]:
def convert_floats_to_ints(df):
    for column in df.columns:
        if df[column].dtype == 'float64':
            df[column] = df[column].fillna(0).astype(int)
    return df

In [23]:
bucket_name = 'data-402-final-project'
prefix = 'Academy/'

In [24]:
csv_files = list_csv_files(bucket_name, prefix)

In [27]:
if csv_files:
    # View contents of the first CSV file
    first_csv_file = csv_files[0]
    academy_csv_files = read_csv_from_s3(bucket_name, first_csv_file)
    print("Contents of the first CSV file:")
    print(academy_csv_files)
    
    # Convert floats to ints
    academy_csv_files = convert_floats_to_ints(academy_csv_files)
    print("\nContents of the first CSV file after converting floats to ints:")
    print(academy_csv_files)
else:
    print("No CSV files found in the Academy folder.")

Contents of the first CSV file:
               name       trainer  Analytic_W1  Independent_W1  Determined_W1  \
0   Quintus Penella  Gregor Gomez            1               2              2   
1      Simon Murrey  Gregor Gomez            6               1              1   
2       Gustaf Lude  Gregor Gomez            6               4              1   
3     Yolanda Fosse  Gregor Gomez            2               1              2   
4      Lynnett Swin  Gregor Gomez            2               2              4   
5  Bart Godilington  Gregor Gomez            3               6              1   
6        Deni Roust  Gregor Gomez            6               3              1   
7   Gerhard Mcgrath  Gregor Gomez            2               3              3   

   Professional_W1  Studious_W1  Imaginative_W1  Analytic_W2  Independent_W2  \
0                1            2               2          NaN             NaN   
1                2            4               2          3.0             1.0  