In [1]:
import boto3
import pandas as pd
import numpy as np

In [2]:
def download_file_from_s3(bucket, object_name, local_file_name):
    """
    Download a file from S3 to the local file system.

    :param bucket: Name of the S3 bucket
    :param object_name: S3 object name
    :param local_file_name: Local file name to save the downloaded file
    """
    # # When using IAM roles, boto3 retrieves credentials from the instance metadata
    # s3_client = boto3.client('s3')

    #When setting up credentials locally, use the following code
    session = boto3.Session()
    s3_client = session.client('s3')

    s3_client.download_file(bucket, object_name, local_file_name)

bucket_name = 'myukdata'
s3_file_name = 'Original/Constituency/Constituency_Unique.csv'
local_file = 'Constituency_Unique.csv'

download_file_from_s3(bucket_name, s3_file_name, local_file)



In [3]:
df = pd.read_csv('Constituency_Unique.csv')
df.info()
pd.set_option('display.max_rows', None)
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3876 entries, 0 to 3875
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   id         3876 non-null   int64 
 1   name       3876 non-null   object
 2   startDate  3876 non-null   object
 3   endDate    3226 non-null   object
dtypes: int64(1), object(3)
memory usage: 121.2+ KB


Unnamed: 0,id,name,startDate,endDate
0,1,Aberavon,1918-12-14T00:00:00,1950-02-23T00:00:00
1,2,Aberavon,1950-02-23T00:00:00,1974-02-28T00:00:00
2,3,Aberavon,1974-02-28T00:00:00,1983-06-09T00:00:00
3,4,Aberavon,1983-06-09T00:00:00,1997-05-01T00:00:00
4,5,Aberavon,1997-05-01T00:00:00,2010-05-06T00:00:00


In [4]:
df['startDate'] = pd.to_datetime(df['startDate'])
df['endDate'] = pd.to_datetime(df['endDate'])
df.rename(columns={'id': 'constituencyId',
                     'name': 'constituencyName'}, inplace=True)

In [5]:
df.info()
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3876 entries, 0 to 3875
Data columns (total 4 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   constituencyId    3876 non-null   int64         
 1   constituencyName  3876 non-null   object        
 2   startDate         3876 non-null   datetime64[ns]
 3   endDate           3226 non-null   datetime64[ns]
dtypes: datetime64[ns](2), int64(1), object(1)
memory usage: 121.2+ KB


Unnamed: 0,constituencyId,constituencyName,startDate,endDate
0,1,Aberavon,1918-12-14,1950-02-23
1,2,Aberavon,1950-02-23,1974-02-28
2,3,Aberavon,1974-02-28,1983-06-09
3,4,Aberavon,1983-06-09,1997-05-01
4,5,Aberavon,1997-05-01,2010-05-06


In [6]:
#Save the file to S3
import boto3
import io

def upload_df_to_s3(df, bucket, object_name):
    """
    Upload a DataFrame to an S3 bucket as CSV.

    :param df: DataFrame to upload
    :param bucket: Bucket to upload to
    :param object_name: S3 object name
    :return: True if the DataFrame was uploaded, else False
    """
    # Create a buffer
    csv_buffer = io.StringIO()
    df.to_csv(csv_buffer, index=False)

    # Move to the start of the buffer
    csv_buffer.seek(0)

    # Upload the buffer content to S3
    s3_client = boto3.client('s3')
    try:
        s3_client.put_object(Bucket=bucket, Key=object_name, Body=csv_buffer.getvalue())
    except ClientError as e:
        logging.error(e)
        return False
    return True

bucket_name = 'myukdata'
object_name = 'Preprocessing/Constituency/Constituency.csv'
upload_df_to_s3(df, bucket_name, object_name)


True