In [1]:
import psycopg2
import boto3
import os
import pandas as pd
from fuzzywuzzy import fuzz
from fuzzywuzzy import process


from dotenv import load_dotenv

from rds_connector import rds_connection
from rds_connector import RDSTablePull

from data_cleaning_utils import clean_empty_none

# Load environment variables from .env file
load_dotenv()

# Access the secret value
username = os.getenv('USER')
password = os.getenv('PASS')
db = os.getenv('DB')
server = os.getenv('SERVER')

try:

    #Connect to AWS RDS Database
    conn, cursor = rds_connection(username,
                          password,
                          db,
                          server)
    
except Exception as e:
    
    #Raise Error
    raise Exception(f"AWS RDS CONNECTION ERROR: {e}")

In [2]:
source = {
    'table': 'applications_application',
    'name' : 'app',
    'fields': [{'application_number' : 'application_number',
                       'created_at' : 'created_at'}],
    'project': 34,
    'order': 'id'
}

join_list = [
    
    {'name': 'hotel_name',
     'question_source': 'JOIN_SOURCE',
     'source_id': 'id',
     'join_id': 'application_id',
     'data_source': 'application_data_textboxanswer',
     'question_id': 1015,
     'fields' : [{'value':'hotel_name'}],
     'clean' : [{'hotel_name': ['NULL']}]
    },

    {'name': 'hotel_address',
     'question_source': 'DATA_SOURCE',
     'source_id': 'repeating_answer_section_id',
     'join_id': 'repeating_answer_section_id',
     'question_id': 1016,
     'data_source': 'application_data_addressanswer',
     'fields': [{'line1':'hotel_address_line_1',
                 'line2': 'hotel_address_line_2',
                 'city' : 'hotel_city',
                 'state': 'hotel_state',
                 'zip' : 'hotel_zip'}],
     'clean' : [{'hotel_address_line_1': ['NULL']},
                {'city': ['NULL']},
                {'state': ['NULL']},
                {'zip': ['NULL']}]
    },

    {'name': 'hotel_status',
     'question_source': 'DATA_SOURCE',
     'source_id': 'repeating_answer_section_id',
     'join_id': 'repeating_answer_section_id',
     'data_source': 'application_data_singleselectanswer',
     'question_id': 1013,
     'fields': [{'value':'hotel_status'}],
     'clean' : []
    },

    {'name': 'license_in',
     'question_source': 'DATA_SOURCE',
     'source_id': 'repeating_answer_section_id',
     'join_id': 'repeating_answer_section_id',
     'data_source': 'application_data_dateanswer',
     'question_id': 1021,
     'fields': [{'value':'license_in'}],
     'clean' : [{'license_in': ['DATE_CONVERT']}]
    },

    {'name': 'license_out',
     'question_source': 'DATA_SOURCE',
     'source_id': 'repeating_answer_section_id',
     'join_id': 'repeating_answer_section_id',
     'data_source': 'application_data_dateanswer',
     'question_id': 1022,
     'fields': [{'value':'license_out'}],
     'clean' : [{'license_out': ['DATE_CONVERT']}]
    },

    {'name': 'total_in_household',
     'question_source': 'JOIN_SOURCE',
     'source_id': 'id',
     'join_id': 'application_id',
     'data_source': 'application_data_numberanswer',
     'question_id': 596,
     'fields': [{'value':'total_in_household'}],
     'clean' : [{'total_in_household': ['INT_CONVERT']}]
    }
]

In [3]:
from rds_connector import build_schema

bui

In [3]:
rds = RDSTablePull(conn = conn,
                   cursor = cursor,
                   query = None,
                   source = source, 
                   join_list = join_list)



rds.query_to_df()

Unnamed: 0,application_number,created_at,hotel_name,hotel_address_line_1,hotel_address_line_2,hotel_city,hotel_state,hotel_zip,hotel_status,license_in,license_out,total_in_household
0,KQ6625DD77,2024-09-27 18:40:42.775742+00:00,Testing 1234,dfsijldfsaijpldfs,,,FL,,Rejected,2024-11-07,2024-10-05,2.00
1,KQ6625DD77,2024-09-27 18:40:42.775742+00:00,third one,,,,FL,,Moved Out,2024-11-19,2025-01-21,2.00
2,KQ6625DD77,2024-09-27 18:40:42.775742+00:00,6846846489,,,,FL,,Applicant Accepted,2024-12-25,2024-11-02,2.00
3,B2B9MZYTQL,2024-10-01 18:06:55.607877+00:00,,,,,FL,,Rejected,,,2.00
4,NY6LC7XV4W,2024-10-01 19:10:39.192189+00:00,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
5196,986UHXVWF6,2024-10-24 20:07:53.334196+00:00,,,,,,,,,,2.00
5197,F4H3HRNGGR,2024-10-24 20:17:42.092803+00:00,,,,,,,,,,
5198,SYJTHFUUJZ,2024-10-24 20:29:16.697209+00:00,,,,,,,,,,2.00
5199,4N7H2XRRN8,2024-10-24 20:50:17.914068+00:00,,,,,,,,,,


In [None]:
print(rds.build_query())

In [None]:


# AWS credentials
#ACCESS_KEY = os.getenv("ACESSKEY")
#SECRET_KEY = os.getenv("SECRETKEY")

# Bucket and file information
bucket_name = 'your_bucket_name'
file_path = 'path_to_your_csv_file.csv'
key_name = 'key_name_for_csv_file_in_s3_bucket.csv'

# Create a S3 client
s3 = boto3.client('s3', aws_access_key_id=ACCESS_KEY, aws_secret_access_key=SECRET_KEY)

# Upload the CSV file to S3 bucket
s3.upload_file(file_path, bucket_name, key_name)

print(f'{file_path} has been successfully uploaded to {bucket_name}/{key_name}')