In [214]:
import pandas as pd
from io import StringIO
import boto3
import time

AWS_ACCESS_KEY = ""
AWS_ACCESS_SECRET = ""
AWS_REGION = "us-east-1"
SCHEMA_NAME = "bigdata-covid-db"
S3_STAGING_DIR = "s3://bigdata-covid-project-athena/athena-output-data/"
S3_BUCKET_NAME = "bigdata-covid-project-athena"
S3_OUTPUT_DIRECTORY = "athena-output-data"

In [215]:
athena_client = boto3.client("athena",
                            aws_access_key_id = AWS_ACCESS_KEY,
                            aws_secret_access_key = AWS_ACCESS_SECRET,
                            region_name = AWS_REGION,
                            )

In [216]:
#querying athena and storing into pandas dataframe
Dict = {}
def download_and_load_query_results(client: boto3.client, 
                                    query_response: Dict
                                   )-> pd.DataFrame:
    while True:
        try:
            client.get_query_results(
            QueryExecutionId = query_response["QueryExecutionId"])
            break
        except Exception as err:
            if "not yet finished" in str(err):
                time.sleep(0.001)
            else:
                raise err
    temp_file_location: str = "athena_query_results.csv"
    s3_client = boto3.client(
    "s3",
    aws_access_key_id=AWS_ACCESS_KEY,
    aws_secret_access_key=AWS_ACCESS_SECRET,
    region_name=AWS_REGION,
    )
    s3_client.download_file(
    S3_BUCKET_NAME,
    f"{S3_OUTPUT_DIRECTORY}/{query_response['QueryExecutionId']}.csv",
    temp_file_location,
    )
    return pd.read_csv(temp_file_location)
response = athena_client.start_query_execution(
    QueryString="SELECT * FROM vaccinations",
    QueryExecutionContext={"Database": SCHEMA_NAME},
    ResultConfiguration={
        "OutputLocation": S3_STAGING_DIR,
        "EncryptionConfiguration": {"EncryptionOption": "SSE_S3"},
    },
)
response

{'QueryExecutionId': '932776f2-431e-497a-aabd-d059b3352964',
 'ResponseMetadata': {'RequestId': 'd414c18a-e357-427e-b5ec-9476527734d9',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Fri, 10 Nov 2023 14:05:53 GMT',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '59',
   'connection': 'keep-alive',
   'x-amzn-requestid': 'd414c18a-e357-427e-b5ec-9476527734d9'},
  'RetryAttempts': 0}}

In [217]:
vaccinations = download_and_load_query_results(athena_client, response)

In [218]:
vaccinations.head()

Unnamed: 0,location,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,total_boosters,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,total_boosters_per_hundred,daily_vaccinations_per_million,daily_people_vaccinated,daily_people_vaccinated_per_hundred
0,Afghanistan,AFG,2021-02-22,0.0,0.0,,,,,0.0,0.0,,,,,
1,Afghanistan,AFG,2021-02-23,,,,,,1367.0,,,,,34.0,1367.0,0.003
2,Afghanistan,AFG,2021-02-24,,,,,,1367.0,,,,,34.0,1367.0,0.003
3,Afghanistan,AFG,2021-02-25,,,,,,1367.0,,,,,34.0,1367.0,0.003
4,Afghanistan,AFG,2021-02-26,,,,,,1367.0,,,,,34.0,1367.0,0.003


In [219]:
#Repeat above step for remaining two tables as well by changing the table name and variable name of the dataframe
response = athena_client.start_query_execution(
    QueryString="SELECT * FROM vaccinations_by_manufacturer",
    QueryExecutionContext={"Database": SCHEMA_NAME},
    ResultConfiguration={
        "OutputLocation": S3_STAGING_DIR,
        "EncryptionConfiguration": {"EncryptionOption": "SSE_S3"},
    },
)
response
vaccinations_by_manufacturer = download_and_load_query_results(athena_client, response)
vaccinations_by_manufacturer.head()

Unnamed: 0,location,date,vaccine,total_vaccinations
0,Argentina,2020-12-29,Moderna,2
1,Argentina,2020-12-29,Oxford/AstraZeneca,7
2,Argentina,2020-12-29,Pfizer/BioNTech,1
3,Argentina,2020-12-29,Sinopharm/Beijing,3
4,Argentina,2020-12-29,Sputnik V,20484


In [220]:
response = athena_client.start_query_execution(
    QueryString="SELECT * FROM location",
    QueryExecutionContext={"Database": SCHEMA_NAME},
    ResultConfiguration={
        "OutputLocation": S3_STAGING_DIR,
        "EncryptionConfiguration": {"EncryptionOption": "SSE_S3"},
    },
)
response
location = download_and_load_query_results(athena_client, response)
location.head()

Unnamed: 0,col0,col1,col2,col3,col4,col5
0,location,iso_code,vaccines,last_observation_date,source_name,source_website
1,Afghanistan,AFG,"CanSino, Covaxin, Johnson&Johnson, Moderna, Ox...",2022-06-01,World Health Organization,https://covid19.who.int/
2,Albania,ALB,"Oxford/AstraZeneca, Pfizer/BioNTech, Sinovac, ...",2022-05-22,World Health Organization,https://covid19.who.int/
3,Algeria,DZA,"Oxford/AstraZeneca, Sinopharm/Beijing, Sinovac...",2022-05-01,World Health Organization,https://covid19.who.int/
4,Andorra,AND,"Moderna, Oxford/AstraZeneca, Pfizer/BioNTech",2022-05-22,World Health Organization,https://covid19.who.int/


In [221]:
#Fixing the column names in location table by using iloc function in pandas.This is a data cleaning step.
#get the first row
new_column_name = location.iloc[0]
#remove the first row and store in the same data frame
location = location[1:]
#assign location column names with new column name stored above
location.columns = new_column_name
location.head()

Unnamed: 0,location,iso_code,vaccines,last_observation_date,source_name,source_website
1,Afghanistan,AFG,"CanSino, Covaxin, Johnson&Johnson, Moderna, Ox...",2022-06-01,World Health Organization,https://covid19.who.int/
2,Albania,ALB,"Oxford/AstraZeneca, Pfizer/BioNTech, Sinovac, ...",2022-05-22,World Health Organization,https://covid19.who.int/
3,Algeria,DZA,"Oxford/AstraZeneca, Sinopharm/Beijing, Sinovac...",2022-05-01,World Health Organization,https://covid19.who.int/
4,Andorra,AND,"Moderna, Oxford/AstraZeneca, Pfizer/BioNTech",2022-05-22,World Health Organization,https://covid19.who.int/
5,Angola,AGO,Oxford/AstraZeneca,2022-05-22,World Health Organization,https://covid19.who.int/


In [222]:
#Fetching only location and vaccines from location and storing in new data frame
location_and_vaccines = location[['location','vaccines']]
location_and_vaccines.head()

Unnamed: 0,location,vaccines
1,Afghanistan,"CanSino, Covaxin, Johnson&Johnson, Moderna, Ox..."
2,Albania,"Oxford/AstraZeneca, Pfizer/BioNTech, Sinovac, ..."
3,Algeria,"Oxford/AstraZeneca, Sinopharm/Beijing, Sinovac..."
4,Andorra,"Moderna, Oxford/AstraZeneca, Pfizer/BioNTech"
5,Angola,Oxford/AstraZeneca


In [223]:
#Fetching only location and last_observation_date from location and storing in new data frame
location_and_last_observation_date = location[['location','last_observation_date']]
location_and_last_observation_date['last_observation_date'] = pd.to_datetime(location_and_last_observation_date['last_observation_date'])
location_and_last_observation_date.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  location_and_last_observation_date['last_observation_date'] = pd.to_datetime(location_and_last_observation_date['last_observation_date'])


Unnamed: 0,location,last_observation_date
1,Afghanistan,2022-06-01
2,Albania,2022-05-22
3,Algeria,2022-05-01
4,Andorra,2022-05-22
5,Angola,2022-05-22


In [224]:
#Fetching only vaccine and total_vaccinations from vaccinations_by_manufacturer and storing in new data frame
vaccinations_by_manufacturer_and_total_vaccinations = vaccinations_by_manufacturer[['vaccine','total_vaccinations']]

vaccinations_by_manufacturer_and_total_vaccinations.head()

Unnamed: 0,vaccine,total_vaccinations
0,Moderna,2
1,Oxford/AstraZeneca,7
2,Pfizer/BioNTech,1
3,Sinopharm/Beijing,3
4,Sputnik V,20484


In [225]:
#Fetching only location and daily_vaccinations from vaccinations and storing in new data frame
location_and_daily_vaccinations = vaccinations[['location','daily_vaccinations']]
location_and_daily_vaccinations.head()

Unnamed: 0,location,daily_vaccinations
0,Afghanistan,
1,Afghanistan,1367.0
2,Afghanistan,1367.0
3,Afghanistan,1367.0
4,Afghanistan,1367.0


In [26]:
location_and_daily_vaccinations.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 106528 entries, 0 to 106527
Data columns (total 2 columns):
 #   Column              Non-Null Count   Dtype  
---  ------              --------------   -----  
 0   location            106528 non-null  object 
 1   daily_vaccinations  106089 non-null  float64
dtypes: float64(1), object(1)
memory usage: 1.6+ MB


In [27]:
vaccinations_by_manufacturer_and_total_vaccinations.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42763 entries, 0 to 42762
Data columns (total 2 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   vaccine             42763 non-null  object
 1   total_vaccinations  42763 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 668.3+ KB


In [28]:
vaccinations_by_manufacturer_and_total_vaccinations.isnull().sum()

vaccine               0
total_vaccinations    0
dtype: int64

In [29]:
vaccinations_by_manufacturer_and_total_vaccinations.isnull().sum()

vaccine               0
total_vaccinations    0
dtype: int64

In [226]:
#Fill na values of total_vaccinations column as 0
vaccinations_by_manufacturer_and_total_vaccinations['total_vaccinations'] = vaccinations_by_manufacturer_and_total_vaccinations['total_vaccinations'].fillna(0)
vaccinations_by_manufacturer_and_total_vaccinations.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  vaccinations_by_manufacturer_and_total_vaccinations['total_vaccinations'] = vaccinations_by_manufacturer_and_total_vaccinations['total_vaccinations'].fillna(0)


Unnamed: 0,vaccine,total_vaccinations
0,Moderna,2
1,Oxford/AstraZeneca,7
2,Pfizer/BioNTech,1
3,Sinopharm/Beijing,3
4,Sputnik V,20484


In [227]:
#Fill na values of daily_vaccinations column of location_and_daily_vaccinations as 0
location_and_daily_vaccinations['daily_vaccinations'] = location_and_daily_vaccinations['daily_vaccinations'].fillna(0)
location_and_daily_vaccinations.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  location_and_daily_vaccinations['daily_vaccinations'] = location_and_daily_vaccinations['daily_vaccinations'].fillna(0)


Unnamed: 0,location,daily_vaccinations
0,Afghanistan,0.0
1,Afghanistan,1367.0
2,Afghanistan,1367.0
3,Afghanistan,1367.0
4,Afghanistan,1367.0


In [228]:
#Currently we have all the required data from all three tables in following 4 dataframes
#location_and_vaccines, location_and_last_observation_date, vaccinations_by_manufacturer_and_total_vaccinations,location_and_daily_vaccinations
#Storing all these to an S3 bucket
bucket = "bigdata-covid-project"
csv_buffer = StringIO()
csv_buffer

<_io.StringIO at 0x120a371c0>

In [229]:
#store the dataframes into buffers which can be then stored to S3
location_and_vaccines.to_csv(csv_buffer)
csv_buffer.getvalue()



',location,vaccines\n1,Afghanistan,"CanSino, Covaxin, Johnson&Johnson, Moderna, Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm/Beijing, Sinovac, Sputnik Light, Sputnik V"\n2,Albania,"Oxford/AstraZeneca, Pfizer/BioNTech, Sinovac, Sputnik V"\n3,Algeria,"Oxford/AstraZeneca, Sinopharm/Beijing, Sinovac, Sputnik V"\n4,Andorra,"Moderna, Oxford/AstraZeneca, Pfizer/BioNTech"\n5,Angola,Oxford/AstraZeneca\n6,Anguilla,"Oxford/AstraZeneca, Pfizer/BioNTech"\n7,Antigua and Barbuda,"Oxford/AstraZeneca, Pfizer/BioNTech, Sputnik V"\n8,Argentina,"CanSino, Moderna, Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm/Beijing, Sputnik V"\n9,Armenia,"Moderna, Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm/Beijing, Sinopharm/Wuhan, Sinovac, Sputnik Light, Sputnik V"\n10,Aruba,Pfizer/BioNTech\n11,Australia,"Moderna, Novavax, Oxford/AstraZeneca, Pfizer/BioNTech"\n12,Austria,"Johnson&Johnson, Moderna, Novavax, Oxford/AstraZeneca, Pfizer/BioNTech"\n13,Azerbaijan,"Oxford/AstraZeneca, Pfizer/BioNTech, Sinovac, Sputnik V

In [230]:
#Create S3 object and storing the buffers into the S3
ACCESS_KEY = ""
ACCESS_SECRET = ""
REGION = "us-east-1"
s3 = boto3.client(
    's3',
    aws_access_key_id=ACCESS_KEY,
    aws_secret_access_key=ACCESS_SECRET,
    region_name=REGION,
    )
#csv_buffer.seek(0)
s3.put_object(Bucket=bucket, Body=csv_buffer.getvalue(), Key='output/location_and_vaccines.csv')

{'ResponseMetadata': {'RequestId': 'HAF5YFGM1EE5AYZ4',
  'HostId': 'C7d0s8efv04SEnjQjHEvVoFvg92dgU55tMMxC9tKmMviUQKOMulgXET/yJRNFY5Remzl97UaOwU=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'C7d0s8efv04SEnjQjHEvVoFvg92dgU55tMMxC9tKmMviUQKOMulgXET/yJRNFY5Remzl97UaOwU=',
   'x-amz-request-id': 'HAF5YFGM1EE5AYZ4',
   'date': 'Fri, 10 Nov 2023 14:08:04 GMT',
   'x-amz-server-side-encryption': 'AES256',
   'etag': '"9112ffd3c7895d7202f201529116c5de"',
   'server': 'AmazonS3',
   'content-length': '0'},
  'RetryAttempts': 0},
 'ETag': '"9112ffd3c7895d7202f201529116c5de"',
 'ServerSideEncryption': 'AES256'}

In [231]:
#load location_and_last_observation_date to s3
csv_buffer_1 = StringIO()
csv_buffer_1
location_and_last_observation_date.to_csv(csv_buffer_1)
csv_buffer_1.getvalue()
s3.put_object(Bucket=bucket, Body=csv_buffer_1.getvalue(), Key='output/location_and_last_observation_date.csv')

{'ResponseMetadata': {'RequestId': '3KB5TDPACDF0E084',
  'HostId': 'tQLG5u40VsfoG+9PQPUYblGQoJHQP6lQj60imrYngEEC+3WTXRzetq1GkE4fC62uaXp7y3YqSuc=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'tQLG5u40VsfoG+9PQPUYblGQoJHQP6lQj60imrYngEEC+3WTXRzetq1GkE4fC62uaXp7y3YqSuc=',
   'x-amz-request-id': '3KB5TDPACDF0E084',
   'date': 'Fri, 10 Nov 2023 14:08:27 GMT',
   'x-amz-server-side-encryption': 'AES256',
   'etag': '"b09172979633b6179e69453456d55dea"',
   'server': 'AmazonS3',
   'content-length': '0'},
  'RetryAttempts': 0},
 'ETag': '"b09172979633b6179e69453456d55dea"',
 'ServerSideEncryption': 'AES256'}

In [232]:
#load vaccinations_by_manufacturer_and_total_vaccinations to s3
csv_buffer_2 = StringIO()
csv_buffer_2
vaccinations_by_manufacturer_and_total_vaccinations.to_csv(csv_buffer_2)
csv_buffer.getvalue()
s3.put_object(Bucket=bucket, Body=csv_buffer_2.getvalue(), Key='output/vaccinations_by_manufacturer_and_total_vaccinations.csv')

{'ResponseMetadata': {'RequestId': 'D3H6G2ZZJDWH9V2D',
  'HostId': 'yjOOYEXkAaYT4aP1SsnIHiL/7atyjdm3Rlyg62572pGhQSfyF6ldbjrqqoGDcFgUOqTPBsCX9RE=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'yjOOYEXkAaYT4aP1SsnIHiL/7atyjdm3Rlyg62572pGhQSfyF6ldbjrqqoGDcFgUOqTPBsCX9RE=',
   'x-amz-request-id': 'D3H6G2ZZJDWH9V2D',
   'date': 'Fri, 10 Nov 2023 14:08:39 GMT',
   'x-amz-server-side-encryption': 'AES256',
   'etag': '"5c7dbf0903114024e795d16b133cb7c4"',
   'server': 'AmazonS3',
   'content-length': '0'},
  'RetryAttempts': 0},
 'ETag': '"5c7dbf0903114024e795d16b133cb7c4"',
 'ServerSideEncryption': 'AES256'}

In [233]:
#load location_and_daily_vaccinations to s3
csv_buffer_3 = StringIO()
csv_buffer_3
location_and_daily_vaccinations.to_csv(csv_buffer_3)
csv_buffer_3.getvalue()
s3.put_object(Bucket=bucket, Body=csv_buffer_3.getvalue(), Key='output/location_and_daily_vaccinations.csv')


{'ResponseMetadata': {'RequestId': '9F1ZMX207477Z29B',
  'HostId': 'OONjrbIHFlZXDWsdg/nkjWVyUcBXzfVIfzCvbQNMjhthqIqSt71nR4GaBnjNrxLrORMqwqghVpo=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'OONjrbIHFlZXDWsdg/nkjWVyUcBXzfVIfzCvbQNMjhthqIqSt71nR4GaBnjNrxLrORMqwqghVpo=',
   'x-amz-request-id': '9F1ZMX207477Z29B',
   'date': 'Fri, 10 Nov 2023 14:08:47 GMT',
   'x-amz-server-side-encryption': 'AES256',
   'etag': '"02dab7065dfb0c6704a9ee8de6bb3e3b"',
   'server': 'AmazonS3',
   'content-length': '0'},
  'RetryAttempts': 0},
 'ETag': '"02dab7065dfb0c6704a9ee8de6bb3e3b"',
 'ServerSideEncryption': 'AES256'}

In [234]:
#Extract schema from the tables stored in the s3 output bucket
#location_and_vaccines
location_and_vaccines_sql = pd.io.sql.get_schema(location_and_vaccines.reset_index(),'location_and_vaccines' )
print(''.join(location_and_vaccines_sql))

CREATE TABLE "location_and_vaccines" (
"index" INTEGER,
  "location" TEXT,
  "vaccines" TEXT
)


In [235]:
#location_and_last_observation_date
location_and_last_observation_date_sql = pd.io.sql.get_schema(location_and_last_observation_date.reset_index(),'location_and_last_observation_date' )
print(''.join(location_and_last_observation_date_sql))

CREATE TABLE "location_and_last_observation_date" (
"index" INTEGER,
  "location" TEXT,
  "last_observation_date" TIMESTAMP
)


In [236]:
#vaccinations_by_manufacturer_and_total_vaccinations
vaccinations_by_manufacturer_and_total_vaccinations_sql = pd.io.sql.get_schema(vaccinations_by_manufacturer_and_total_vaccinations.reset_index(),'vaccinations_by_manufacturer_and_total_vaccinations' )
print(''.join(vaccinations_by_manufacturer_and_total_vaccinations_sql))


CREATE TABLE "vaccinations_by_manufacturer_and_total_vaccinations" (
"index" INTEGER,
  "vaccine" TEXT,
  "total_vaccinations" INTEGER
)


In [237]:
#location_and_daily_vaccinations
location_and_daily_vaccinations_sql = pd.io.sql.get_schema(location_and_daily_vaccinations.reset_index(),'location_and_daily_vaccinations' )
print(''.join(location_and_daily_vaccinations_sql))

CREATE TABLE "location_and_daily_vaccinations" (
"index" INTEGER,
  "location" TEXT,
  "daily_vaccinations" REAL
)


In [238]:
import redshift_connector
# Connects to Redshift cluster using AWS credentials
conn = redshift_connector.connect(
     host='bigdata-covid-redshift.csuwrslimmfj.us-east-1.redshift.amazonaws.com',
     port=5439,
     database='dev',
     user='',
     password=''
  )
#cursor= conn.cursor()
conn.autocommit=True
  
# Create a Cursor object
cursor: redshift_connector.Cursor = conn.cursor()
#Creating location_and_vaccines table on Redshift
cursor.execute("""
CREATE TABLE "location_and_vaccines" (
"index" INTEGER,
  "location" TEXT,
  "vaccines" TEXT
)
""")


<redshift_connector.cursor.Cursor at 0x13ff23ac0>

In [239]:
#Creating location_and_last_observation_date table on Redshift
cursor.execute("""
CREATE TABLE "location_and_last_observation_date" (
"index" INTEGER,
  "location" TEXT,
  "last_observation_date" TIMESTAMP
)

""")

<redshift_connector.cursor.Cursor at 0x13ff23ac0>

In [240]:
#Creating vaccinations_by_manufacturer_and_total_vaccinations table on Redshift
cursor.execute("""
CREATE TABLE "vaccinations_by_manufacturer_and_total_vaccinations" (
"index" INTEGER,
  "vaccine" TEXT,
  "total_vaccinations" INTEGER
)
""")

<redshift_connector.cursor.Cursor at 0x13ff23ac0>

In [241]:
#Creating location_and_daily_vaccinations table on Redshift
cursor.execute("""
CREATE TABLE "location_and_daily_vaccinations" (
"index" INTEGER,
  "location" TEXT,
  "daily_vaccinations" REAL
)
""")

<redshift_connector.cursor.Cursor at 0x13ff23ac0>

In [242]:
#copy command to copy data from s3 to redshift tables

cursor.execute("""
copy location_and_vaccines from 's3://bigdata-covid-project/output/location_and_vaccines.csv'
access_key_id ''
secret_access_key ''
delimiter ','
region 'us-east-1'
IGNOREHEADER 1
removequotes
emptyasnull
blanksasnull
""")


<redshift_connector.cursor.Cursor at 0x13ff23ac0>

In [244]:

cursor.execute("""
copy location_and_daily_vaccinations from 's3://bigdata-covid-project/output/location_and_daily_vaccinations.csv'
access_key_id ''
secret_access_key ''
delimiter ','
region 'us-east-1'
IGNOREHEADER 1
removequotes
emptyasnull
blanksasnull
""")


<redshift_connector.cursor.Cursor at 0x13ff23ac0>

In [245]:
cursor.execute("""
copy location_and_last_observation_date from 's3://bigdata-covid-project/output/location_and_last_observation_date.csv'
access_key_id ''
secret_access_key ''
delimiter ','
region 'us-east-1'
IGNOREHEADER 1
removequotes
emptyasnull
blanksasnull
""")

<redshift_connector.cursor.Cursor at 0x13ff23ac0>

In [246]:
cursor.execute("""
copy dev.public.vaccinations_by_manufacturer_and_total_vaccinations from 's3://bigdata-covid-project/output/vaccinations_by_manufacturer_and_total_vaccinations.csv'
access_key_id ''
secret_access_key ''
delimiter ','
region 'us-east-1'
IGNOREHEADER 1
removequotes
emptyasnull
blanksasnull
""")

<redshift_connector.cursor.Cursor at 0x13ff23ac0>