### 1. Install Library

In [1]:
# %pip install pandas mysql-connector-python google-cloud-bigquery

### 2. Import Library

In [2]:
import pandas as pd
import mysql.connector
import sqlite3
import os
from google.cloud import bigquery
from dotenv import load_dotenv

### 3. Database Configuration

In [3]:
# Load konfigurasi dari .env
load_dotenv()

# MySQL configurations
rds_host = os.getenv('RDS_HOST')
rds_dbname = os.getenv('RDS_DBNAME')
rds_user = os.getenv('RDS_USER')
rds_password = os.getenv('RDS_PASSWORD')

# Google Cloud configurations
project_id = os.getenv('GOOGLE_CLOUD_PROJECT_ID')
dataset_id = os.getenv('GOOGLE_CLOUD_DATASET_ID')
google_application_credentials = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')

# Set GOOGLE_APPLICATION_CREDENTIALS environment variable
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = google_application_credentials

# Koneksi ke MySQL di Amazon RDS
conn_rds = mysql.connector.connect(
    host=rds_host,
    database=rds_dbname,
    user=rds_user,
    password=rds_password
)

### 4. Extract

#### table_to_df function below is used to extract data from a table and convert it into a dataframe

In [4]:
def table_to_df(table_name):
   query = f"SELECT * FROM {table_name}"
   df = pd.read_sql_query(query, conn_rds)
   return df

#### 4.1 Converting data from tables to dataframes

In [5]:
table_names = ['destinations', 'routes', 'route_details', 'users']

destinations_df = table_to_df('destinations')
routes_df = table_to_df('routes')
route_details_df = table_to_df('route_details')
users_df = table_to_df('users')

  df = pd.read_sql_query(query, conn_rds)


#### 4.2 Closing connection

In [6]:
conn_rds.close()

### 5. Transformation

#### 5.1 Selecting subset of each dataframe

In [7]:
dim_destinations = destinations_df[['name', 'description', 'open_time', 'close_time', 'entry_price', 'longitude', 'latitude', 'visit_count']]
dim_routes = routes_df[['name', 'start_longitude', 'start_latitude', 'price']]
dim_route_details = route_details_df[['longitude', 'latitude', 'duration', 'order', 'visit_start', 'visit_end']]
dim_users = users_df[['email', 'username', 'fullname', 'phone_number', 'gender', 'city', 'province']]

#### 5.2 destinations dimension

In [8]:
dim_destinations

Unnamed: 0,name,description,open_time,close_time,entry_price,longitude,latitude,visit_count
0,Kawah Ijen,Kawah Ijen adalah sebuah kompleks gunung berap...,08:00,17:00,100000.0,114.2423,-8.0582,23
1,Candi Borobudur,Candi Borobudur adalah sebuah candi Buddha yan...,08:00,17:00,50000.0,110.2038,-7.6079,4
2,Pantai Kuta,Pantai Kuta adalah salah satu pantai yang terk...,08:00,17:00,0.0,115.1675,-8.7174,1
3,Danau Toba,Danau Toba adalah danau terbesar di Indonesia ...,08:00,17:00,0.0,99.0852,2.6696,0
4,Taman Mini Indonesia Indah,Taman Mini Indonesia Indah adalah sebuah taman...,08:00,17:00,20000.0,106.8956,-6.3027,2
5,Gunung Bromo,Gunung Bromo adalah sebuah gunung berapi aktif...,08:00,17:00,30000.0,112.9528,-7.9425,1
6,Goa Pindul,Gua tempat Joko terbentur tersebut dinamai Gua...,08:00,17:00,25000.0,123.456,456.789,1
7,Pulau Komodo,Pulau Komodo adalah sebuah pulau yang terletak...,08:00,17:00,150000.0,119.4986,-8.5833,0
8,Raja Ampat,Raja Ampat adalah kepulauan yang terletak di b...,08:00,17:00,500000.0,130.5036,-1.0562,1
9,Tanah Lot,Tanah Lot adalah sebuah formasi batuan di lepa...,08:00,17:00,20000.0,115.0865,-8.6211,0


#### 5.3 routes dimension

In [9]:
dim_routes

Unnamed: 0,name,start_longitude,start_latitude,price
0,Rute 1,3.105625,97.394489,50000.0
1,Rute 2,3.105625,97.394489,50000.0


#### 5.4 route_details dimension

In [10]:
dim_route_details

Unnamed: 0,longitude,latitude,duration,order,visit_start,visit_end
0,114.2423,-8.0582,3600,1,0 days 09:00:00,0 days 11:00:00
1,114.2423,-8.0582,3600,1,0 days 09:00:00,0 days 11:00:00
2,114.2423,-8.0582,3600,1,0 days 09:00:00,0 days 11:00:00


#### 5.5 users dimension

In [11]:
dim_users

Unnamed: 0,email,username,fullname,phone_number,gender,city,province
0,xifihi49190@huleos.com,testverify10,John Doe,81234567891.0,,,
1,irsyadyazidsyafiq@gmail.com,yazid.syafiq,Yazid Syafiq Irsyad,81393984849.0,,,
2,shuuuuuuu@example.com,paimon,nama_lengkap,81234567890.0,pria,Jaksel,Jakarta
3,johndoe2345@example.com,johndoe2443,John Doe,81234567891.0,,,
4,johndoe100@example.com,johndoe100,John Doe,81234567891.0,,,
5,demex95935@cgbird.com,testverify21,John Doe,8123456790.0,,,
6,bikiko364222@huleos.com,testverify1,John Doe,81234567891.0,,,
7,xifihi4990@huleos.com,testverify9,John Doe,81234567891.0,,,
8,johndoe16@gmail.com,johndoe16,John Doe,8129391092.0,,,
9,,,,,,,


### 6. Load to Big Query