# ETL & Datawarehouse
**Purpose** : load, process and save data from a datalake (S3) to a datawarehouse (RDS)

In [46]:
# import 
import os
import boto3
import pandas as pd
from dotenv import load_dotenv
from IPython.display import display
from sqlalchemy import create_engine
from sqlalchemy import Column, Integer, String, Float 
from sqlalchemy.ext.declarative import declarative_base

In [2]:
# load dotenv variables
load_dotenv()
DBUSER = os.getenv('DBUSER')
DBPASS = os.getenv('DBPASS')
DBHOST = os.getenv('DBHOST')
DBNAME = os.getenv('DBNAME')

In [3]:
DBNAME

'kayak_db'

In [4]:
# engine : connected to our db
connection_string = f"postgresql+psycopg2://{DBUSER}:{DBPASS}@{DBHOST}/{DBNAME}"
engine = create_engine(connection_string, echo=True, future=True)


In [15]:
# declarative base
Base = declarative_base()

# create city table
# Let's define our table 
class City(Base):
    __tablename__ = "cities"

    uuid = Column(String, primary_key=True)
    name = Column(String)
    full_address = Column(String)
    latitude = Column(Float)
    longitude = Column(Float)

    def __repr__(self):
        return f"<City(name={self.name}, fulladdress={self.full_address})>"

In [16]:
# create_table
INIT = False
if INIT : 
    Base.metadata.create_all(engine)

### Read Datalake files
* weather_csv
* hotels_booking_csv

In [35]:
# s3 bucket
# aws session
load_dotenv()
AWSS_ACCESS_KEY = os.getenv('AWS_ACCESS_KEY_ID')
AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY')
session = boto3.Session(aws_access_key_id=AWSS_ACCESS_KEY, aws_secret_access_key=AWS_SECRET_ACCESS_KEY)

# s3 service
s3 = session.resource("s3")

# bucket 
bucket_name = 'kayak-mahadou'
kayak_bucket = s3.Bucket(bucket_name)

In [49]:
# read weather data from s3
weather_key = "weather_data.csv"
weather_csv_obj = s3.Object(bucket_name, weather_key)
weather_csv = weather_csv_obj.get()['Body'].read().decode('utf-8') 

# dataframe
weather_df = pd.read_csv(weather_csv, index_col=[0])
display(weather_df.sample(2))

Unnamed: 0,uuid,cities,full_address,latitude,longitude,volume_rain_7days
12,e72d90cf-9252-4c78-aba4-8d06fcbe849c,Besancon,"Besançon, Doubs, Bourgogne-Franche-Comté, Fran...",47.238022,6.024362,5.443
10,dded3711-c772-43bd-9401-1fb13ff46ec0,Colmar,"Colmar, Colmar-Ribeauvillé, Haut-Rhin, Grand E...",48.077752,7.357964,4.411


In [50]:
# read hotels data from s3
hotels_key = "hotels_booking.csv"
hotels_key_obj = s3.Object(bucket_name, hotels_key)
hotels_csv = hotels_key_obj.get()['Body'].read().decode('utf-8') 

# dataframe
hotels_df = pd.read_csv(hotels_csv, index_col=[0])
display(hotels_df.sample(2))

Unnamed: 0,city,name,url,image_url,score,description
167,marseille,B&B Hôtel Marseille Centre La Joliette,booking.com/hotel/fr/b-amp-b-ha-tel-marseille-...,https://cf.bstatic.com/xdata/images/hotel/squa...,7.0,Bénéficiant d'une connexion Wi-Fi gratuite dan...
207,bormes-les-mimosas,Superbe Duplex 35m2 vue mer avec piscine,booking.com/hotel/fr/voir-les-photos-presentat...,https://cf.bstatic.com/xdata/images/hotel/squa...,8.5,"Offrant une vue sur la mer, le Superbe Duplex ..."


In [17]:
city = City(uuid="fhgf", name="yoo", full_address="jgd", latitude=0, longitude=0)

In [18]:
city

<City(name=yoo, fulladdress=jgd)>