In [3]:
import pandas as pd
from datetime import datetime, timedelta, date
import group2setting
import boto3
from sqlalchemy import create_engine
from io import StringIO


def load_311_daily_data_from_s3(selected_date_str):
    
    
    bucket = group2setting.s3_bucket
    file_name = "Group_2_311_daily{}.csv".format(selected_date_str)
    print(file_name)
    
    s3 = boto3.client('s3') 
    obj = s3.get_object(Bucket= bucket, Key= file_name) 
    
    data_sample = pd.read_csv(obj['Body'])
    
    print(data_sample.shape)
    
    #print(data_sample.columns)
    sub_df = data_sample.loc[:, ['unique_key', 'created_date','closed_date',
                                'agency','agency_name','complaint_type',
                                'descriptor','incident_zip','city',
                                'status','resolution_description',
                                'resolution_action_updated_date','borough','open_data_channel_type',
                                'latitude','longitude']]
    return sub_df


def save_into_rds_by_name(sub_df, table_name = 't_311_items'):
    
    print(sub_df.shape)
    
    # create engine by sqlalchemy + pymysql
    engine = create_engine(group2setting.rds_mysql_engine)
    
    # to transfer csv to mysql by pandas.to_sql
    sub_df.to_sql(con=engine, name=table_name, if_exists='append', index=False)
    
    print(table_name + " save into rds successfully")

    
# Defining lambda_handler function
def lambda_handler(event, context):
    selected_date = datetime.today() - timedelta(days = 2)
    selected_date_str = selected_date.strftime('%Y%m%d')
    
    sub_df = load_311_daily_data_from_s3(selected_date_str)
    
    save_into_rds_by_name(sub_df)

In [4]:
lambda_handler(1, 2)

s3://information-arch-yuehao-wang-assignment-8a/Group_2_311_daily20210504.csv
 load s3://information-arch-yuehao-wang-assignment-8a/Group_2_311_daily20210504.csv from s3 successfully
(64, 35)
(64, 16)
t_311_items save into rds successfully


In [5]:
import time
def load_history():
    selected_date = datetime.today() - timedelta(days = 1)
    start_date = datetime.fromisoformat('2021-02-15')
    
    while start_date < selected_date:
        print(start_date)
        selected_date_str = start_date.strftime('%Y%m%d')
        
        sub_df = load_311_daily_data_from_s3(selected_date_str)
    
        save_into_rds_by_name(sub_df)
        time.sleep(1)
        
        start_date = start_date + timedelta(days = 1)

load_history()

2021-02-15 00:00:00
s3://information-arch-yuehao-wang-assignment-8a/Group_2_311_daily20210215.csv
 load s3://information-arch-yuehao-wang-assignment-8a/Group_2_311_daily20210215.csv from s3 successfully
(808, 41)
(808, 16)
t_311_items save into rds successfully
2021-02-16 00:00:00
s3://information-arch-yuehao-wang-assignment-8a/Group_2_311_daily20210216.csv
 load s3://information-arch-yuehao-wang-assignment-8a/Group_2_311_daily20210216.csv from s3 successfully
(814, 41)
(814, 16)
t_311_items save into rds successfully
2021-02-17 00:00:00
s3://information-arch-yuehao-wang-assignment-8a/Group_2_311_daily20210217.csv
 load s3://information-arch-yuehao-wang-assignment-8a/Group_2_311_daily20210217.csv from s3 successfully
(774, 41)
(774, 16)
t_311_items save into rds successfully
2021-02-18 00:00:00
s3://information-arch-yuehao-wang-assignment-8a/Group_2_311_daily20210218.csv
 load s3://information-arch-yuehao-wang-assignment-8a/Group_2_311_daily20210218.csv from s3 successfully
(719, 41)
(

t_311_items save into rds successfully
2021-03-19 00:00:00
s3://information-arch-yuehao-wang-assignment-8a/Group_2_311_daily20210319.csv
 load s3://information-arch-yuehao-wang-assignment-8a/Group_2_311_daily20210319.csv from s3 successfully
(999, 41)
(999, 16)
t_311_items save into rds successfully
2021-03-20 00:00:00
s3://information-arch-yuehao-wang-assignment-8a/Group_2_311_daily20210320.csv
 load s3://information-arch-yuehao-wang-assignment-8a/Group_2_311_daily20210320.csv from s3 successfully
(2123, 41)
(2123, 16)
t_311_items save into rds successfully
2021-03-21 00:00:00
s3://information-arch-yuehao-wang-assignment-8a/Group_2_311_daily20210321.csv
 load s3://information-arch-yuehao-wang-assignment-8a/Group_2_311_daily20210321.csv from s3 successfully
(2560, 40)
(2560, 16)
t_311_items save into rds successfully
2021-03-22 00:00:00
s3://information-arch-yuehao-wang-assignment-8a/Group_2_311_daily20210322.csv
 load s3://information-arch-yuehao-wang-assignment-8a/Group_2_311_daily20

2021-04-19 00:00:00
s3://information-arch-yuehao-wang-assignment-8a/Group_2_311_daily20210419.csv
 load s3://information-arch-yuehao-wang-assignment-8a/Group_2_311_daily20210419.csv from s3 successfully
(1361, 41)
(1361, 16)
t_311_items save into rds successfully
2021-04-20 00:00:00
s3://information-arch-yuehao-wang-assignment-8a/Group_2_311_daily20210420.csv
 load s3://information-arch-yuehao-wang-assignment-8a/Group_2_311_daily20210420.csv from s3 successfully
(1859, 41)
(1859, 16)
t_311_items save into rds successfully
2021-04-21 00:00:00
s3://information-arch-yuehao-wang-assignment-8a/Group_2_311_daily20210421.csv
 load s3://information-arch-yuehao-wang-assignment-8a/Group_2_311_daily20210421.csv from s3 successfully
(934, 41)
(934, 16)
t_311_items save into rds successfully
2021-04-22 00:00:00
s3://information-arch-yuehao-wang-assignment-8a/Group_2_311_daily20210422.csv
 load s3://information-arch-yuehao-wang-assignment-8a/Group_2_311_daily20210422.csv from s3 successfully
(784, 4