### 1. Install Library

In [1]:
# %pip install pandas mysql-connector-python google-cloud-bigquery
# %pip install pandas-gbq

### 2. Import Library

In [2]:
import pandas as pd
import mysql.connector
import os
from pandas_gbq import to_gbq
from google.oauth2 import service_account
from google.cloud import bigquery
from dotenv import load_dotenv

### 3. Konfigurasi ke Database

In [3]:
# Load konfigurasi dari .env
load_dotenv()

# MySQL configurations
rds_host = os.getenv('RDS_HOST')
rds_dbname = os.getenv('RDS_DBNAME')
rds_user = os.getenv('RDS_USER')
rds_password = os.getenv('RDS_PASSWORD')

# Koneksi ke MySQL di Amazon RDS
conn_rds = mysql.connector.connect(
    host=rds_host,
    database=rds_dbname,
    user=rds_user,
    password=rds_password
)

### 4. Extract

#### 4.1 Extract semua tabel dan jadikan dataframe

In [4]:
# List of tables to extract
tables = [
    'destinations', 'categories', 'destination_facilities', 'facilities',
    'destination_addresses', 'provinces', 'cities', 'subdistricts', 'destination_media'
]

# Function to load data from MySQL into pandas DataFrame
def load_table_to_df(table_name):
    query = f"SELECT * FROM {table_name}"
    df = pd.read_sql(query, conn_rds)
    return df

#### 4.2 Load semua tabel ke Dictionary

In [5]:
# Load all tables into individual DataFrames and store in a dictionary
dataframes = {}
for table in tables:
    df = load_table_to_df(table)
    dataframes[table] = df

  df = pd.read_sql(query, conn_rds)


#### 4.3 Close Connection

In [6]:
# Close the MySQL connection
conn_rds.close()

#### 4.4 Tampilkan Dataframe destinations

In [7]:
# Example of accessing and displaying a specific DataFrame
df_destinations = dataframes['destinations']
df_destinations

Unnamed: 0,id,category_id,name,description,open_time,close_time,entry_price,longitude,latitude,visit_count,created_at,updated_at,deleted_at
0,306d305e-3359-4884-8d38-89c04e8adea6,eb77b590-b255-4ea1-b11a-d445a259ac61,Kawah Ijen,Kawah Ijen adalah sebuah kompleks gunung berap...,08:00,17:00,100000.0,114.2423,-8.0582,23,2024-06-07 07:47:59,2024-06-10 08:05:22,
1,306d305e-3359-4884-8d38-89c04e8adec1,eb77b590-b255-4ea1-b11a-d445a259ac62,Candi Borobudur,Candi Borobudur adalah sebuah candi Buddha yan...,08:00,17:00,50000.0,110.2038,-7.6079,4,2024-06-07 07:47:59,2024-06-10 08:05:24,
2,306d305e-3359-4884-8d38-89c04e8adec2,eb77b590-b255-4ea1-b11a-d445a259ac61,Pantai Kuta,Pantai Kuta adalah salah satu pantai yang terk...,08:00,17:00,0.0,115.1675,-8.7174,1,2024-06-07 07:47:59,2024-06-08 15:09:44,
3,306d305e-3359-4884-8d38-89c04e8adec3,eb77b590-b255-4ea1-b11a-d445a259ac61,Danau Toba,Danau Toba adalah danau terbesar di Indonesia ...,08:00,17:00,0.0,99.0852,2.6696,0,2024-06-07 07:47:59,2024-06-07 07:47:59,
4,306d305e-3359-4884-8d38-89c04e8adec4,eb77b590-b255-4ea1-b11a-d445a259ac61,Taman Mini Indonesia Indah,Taman Mini Indonesia Indah adalah sebuah taman...,08:00,17:00,20000.0,106.8956,-6.3027,2,2024-06-07 07:47:59,2024-06-08 17:36:51,
5,306d305e-3359-4884-8d38-89c04e8adec5,eb77b590-b255-4ea1-b11a-d445a259ac62,Gunung Bromo,Gunung Bromo adalah sebuah gunung berapi aktif...,08:00,17:00,30000.0,112.9528,-7.9425,1,2024-06-07 07:47:59,2024-06-10 08:05:30,
6,306d305e-3359-4884-8d38-89c04e8adec6,eb77b590-b255-4ea1-b11a-d445a259ac61,Goa Pindul,Gua tempat Joko terbentur tersebut dinamai Gua...,08:00,17:00,25000.0,123.456,456.789,1,2024-06-07 07:47:59,2024-06-08 15:14:05,
7,306d305e-3359-4884-8d38-89c04e8adec7,eb77b590-b255-4ea1-b11a-d445a259ac62,Pulau Komodo,Pulau Komodo adalah sebuah pulau yang terletak...,08:00,17:00,150000.0,119.4986,-8.5833,0,2024-06-07 07:47:59,2024-06-07 07:47:59,
8,306d305e-3359-4884-8d38-89c04e8adec8,eb77b590-b255-4ea1-b11a-d445a259ac61,Raja Ampat,Raja Ampat adalah kepulauan yang terletak di b...,08:00,17:00,500000.0,130.5036,-1.0562,1,2024-06-07 07:47:59,2024-06-08 16:02:56,
9,306d305e-3359-4884-8d38-89c04e8adec9,eb77b590-b255-4ea1-b11a-d445a259ac61,Tanah Lot,Tanah Lot adalah sebuah formasi batuan di lepa...,08:00,17:00,20000.0,115.0865,-8.6211,0,2024-06-07 07:47:59,2024-06-07 07:47:59,


#### 4.5 Tampilkan Dataframe categories

In [8]:
df_categories = dataframes['categories']
df_categories

Unnamed: 0,id,name,url,created_at,updated_at,deleted_at
0,eb77b590-b255-4ea1-b11a-d445a259ac61,Alam,https://res.cloudinary.com/alta-minpro/image/u...,2024-06-06 04:15:53,2024-06-06 04:15:53,
1,eb77b590-b255-4ea1-b11a-d445a259ac62,Seni dan Budaya,https://res.cloudinary.com/alta-minpro/image/u...,2024-06-06 04:15:53,2024-06-10 09:41:51,
2,eb77b590-b255-4ea1-b11a-d445a259ac63,Sejarah,https://res.cloudinary.com/alta-minpro/image/u...,2024-06-06 04:15:53,2024-06-10 09:41:51,


#### 4.6 Tampilkan Dataframe destination_facilities

In [9]:
df_destination_facilities = dataframes['destination_facilities']
df_destination_facilities

Unnamed: 0,id,destination_id,facility_id,created_at,updated_at,deleted_at
0,eb77b590-b255-4ea1-b11a-d445a259ac80,306d305e-3359-4884-8d38-89c04e8adec9,515cb2da-3361-48b8-99fd-bd894828efa3,2024-06-07 07:47:59,2024-06-07 07:47:59,
1,eb77b590-b255-4ea1-b11a-d445a259ac81,306d305e-3359-4884-8d38-89c04e8adea6,515cb2da-3361-48b8-99fd-bd894828efd6,2024-06-07 07:47:59,2024-06-07 07:47:59,
2,eb77b590-b255-4ea1-b11a-d445a259ac82,306d305e-3359-4884-8d38-89c04e8adec1,515cb2da-3361-48b8-99fd-bd894828efa5,2024-06-07 07:47:59,2024-06-07 07:47:59,
3,eb77b590-b255-4ea1-b11a-d445a259ac83,306d305e-3359-4884-8d38-89c04e8adec2,515cb2da-3361-48b8-99fd-bd894828efa5,2024-06-07 07:47:59,2024-06-07 07:47:59,
4,eb77b590-b255-4ea1-b11a-d445a259ac84,306d305e-3359-4884-8d38-89c04e8adec3,515cb2da-3361-48b8-99fd-bd894828efd1,2024-06-07 07:47:59,2024-06-07 07:47:59,
5,eb77b590-b255-4ea1-b11a-d445a259ac85,306d305e-3359-4884-8d38-89c04e8adec4,515cb2da-3361-48b8-99fd-bd894828efa2,2024-06-07 07:47:59,2024-06-07 07:47:59,
6,eb77b590-b255-4ea1-b11a-d445a259ac86,306d305e-3359-4884-8d38-89c04e8adec5,515cb2da-3361-48b8-99fd-bd894828efa5,2024-06-07 07:47:59,2024-06-07 07:47:59,
7,eb77b590-b255-4ea1-b11a-d445a259ac87,306d305e-3359-4884-8d38-89c04e8adec6,515cb2da-3361-48b8-99fd-bd894828efd4,2024-06-07 07:47:59,2024-06-07 07:47:59,
8,eb77b590-b255-4ea1-b11a-d445a259ac88,306d305e-3359-4884-8d38-89c04e8adec7,515cb2da-3361-48b8-99fd-bd894828efd1,2024-06-07 07:47:59,2024-06-07 07:47:59,
9,eb77b590-b255-4ea1-b11a-d445a259ac89,306d305e-3359-4884-8d38-89c04e8adec8,515cb2da-3361-48b8-99fd-bd894828efa4,2024-06-07 07:47:59,2024-06-07 07:47:59,


#### 4.7 Tampilkan Dataframe facilities

In [10]:
df_facilities = dataframes['facilities']
df_facilities

Unnamed: 0,id,name,url,created_at,updated_at,deleted_at
0,515cb2da-3361-48b8-99fd-bd894828efa1,Kolam Renang,https://picsum.photos/300/200,2024-06-07 07:47:49,2024-06-07 07:47:49,
1,515cb2da-3361-48b8-99fd-bd894828efa2,WiFi Gratis,https://picsum.photos/300/200,2024-06-07 07:47:49,2024-06-07 07:47:49,
2,515cb2da-3361-48b8-99fd-bd894828efa3,Pusat Kesehatan,https://picsum.photos/300/200,2024-06-07 07:47:49,2024-06-07 07:47:49,
3,515cb2da-3361-48b8-99fd-bd894828efa4,Layanan Penjemputan,https://picsum.photos/300/200,2024-06-07 07:47:49,2024-06-07 07:47:49,
4,515cb2da-3361-48b8-99fd-bd894828efa5,Tempat Penitipan Barang,https://picsum.photos/300/200,2024-06-07 07:47:49,2024-06-07 07:47:49,
5,515cb2da-3361-48b8-99fd-bd894828efa6,Area Piknik,https://picsum.photos/300/200,2024-06-07 07:47:49,2024-06-07 07:47:49,
6,515cb2da-3361-48b8-99fd-bd894828efd1,Kamar Mandi,https://picsum.photos/300/200,2024-06-07 07:47:49,2024-06-07 07:47:49,
7,515cb2da-3361-48b8-99fd-bd894828efd2,Mushola,https://picsum.photos/300/200,2024-06-07 07:47:49,2024-06-07 07:47:49,
8,515cb2da-3361-48b8-99fd-bd894828efd3,Area Parkir,https://picsum.photos/300/200,2024-06-07 07:47:49,2024-06-07 07:47:49,
9,515cb2da-3361-48b8-99fd-bd894828efd4,Penginapan,https://picsum.photos/300/200,2024-06-07 07:47:49,2024-06-07 07:47:49,


#### 4.8 Tampilkan Dataframe destination_addresses

In [11]:
df_destination_addresses = dataframes['destination_addresses']
df_destination_addresses

Unnamed: 0,id,destination_id,province_id,city_id,subdistrict_id,street_name,postal_code,created_at,updated_at,deleted_at
0,eb77b590-b255-4ea1-b11a-d445a259ac70,306d305e-3359-4884-8d38-89c04e8adec9,95,9504,950433,Jalan Sam Ratulangi,95111,2024-06-07 07:47:59,2024-06-07 07:47:59,
1,eb77b590-b255-4ea1-b11a-d445a259ac71,306d305e-3359-4884-8d38-89c04e8adea6,91,9106,910610,Jalan MH Thamrin,10110,2024-06-07 07:47:59,2024-06-07 07:47:59,
2,eb77b590-b255-4ea1-b11a-d445a259ac72,306d305e-3359-4884-8d38-89c04e8adec1,35,3527,352705,Jalan Dago,40181,2024-06-07 07:47:59,2024-06-07 07:47:59,
3,eb77b590-b255-4ea1-b11a-d445a259ac73,306d305e-3359-4884-8d38-89c04e8adec2,12,1212,121201,Jalan Tunjungan,60231,2024-06-07 07:47:59,2024-06-07 07:47:59,
4,eb77b590-b255-4ea1-b11a-d445a259ac74,306d305e-3359-4884-8d38-89c04e8adec3,32,3213,321312,Jalan Malioboro,55281,2024-06-07 07:47:59,2024-06-07 07:47:59,
5,eb77b590-b255-4ea1-b11a-d445a259ac75,306d305e-3359-4884-8d38-89c04e8adec4,35,3501,350105,Jalan Sunset Road,80228,2024-06-07 07:47:59,2024-06-07 07:47:59,
6,eb77b590-b255-4ea1-b11a-d445a259ac76,306d305e-3359-4884-8d38-89c04e8adec5,74,7402,740225,Jalan Gatot Subroto,20151,2024-06-07 07:47:59,2024-06-07 07:47:59,
7,eb77b590-b255-4ea1-b11a-d445a259ac77,306d305e-3359-4884-8d38-89c04e8adec6,35,3512,351209,Jalan Pandanaran,50149,2024-06-07 07:47:59,2024-06-07 07:47:59,
8,eb77b590-b255-4ea1-b11a-d445a259ac78,306d305e-3359-4884-8d38-89c04e8adec7,35,3510,351024,Jalan Pettarani,90114,2024-06-07 07:47:59,2024-06-07 07:47:59,
9,eb77b590-b255-4ea1-b11a-d445a259ac79,306d305e-3359-4884-8d38-89c04e8adec8,13,1310,131006,Jalan Sudirman,76125,2024-06-07 07:47:59,2024-06-07 07:47:59,


#### 4.9 Tampilkan Dataframe provinces

In [12]:
df_provinces = dataframes['provinces']
df_provinces

Unnamed: 0,id,name,url,created_at,updated_at,deleted_at
0,11,ACEH,https://res.cloudinary.com/alta-minpro/image/u...,2024-06-06 04:15:53,2024-06-06 04:15:53,
1,12,SUMATERA UTARA,https://res.cloudinary.com/alta-minpro/image/u...,2024-06-06 04:15:54,2024-06-06 04:15:54,
2,13,SUMATERA BARAT,https://res.cloudinary.com/alta-minpro/image/u...,2024-06-06 04:15:54,2024-06-06 04:15:54,
3,14,RIAU,https://res.cloudinary.com/alta-minpro/image/u...,2024-06-06 04:15:54,2024-06-06 04:15:54,
4,15,JAMBI,https://res.cloudinary.com/alta-minpro/image/u...,2024-06-06 04:15:54,2024-06-06 04:15:54,
5,16,SUMATERA SELATAN,https://res.cloudinary.com/alta-minpro/image/u...,2024-06-06 04:15:54,2024-06-06 04:15:54,
6,17,BENGKULU,https://res.cloudinary.com/alta-minpro/image/u...,2024-06-06 04:15:54,2024-06-06 04:15:54,
7,18,LAMPUNG,https://res.cloudinary.com/alta-minpro/image/u...,2024-06-06 04:15:54,2024-06-06 04:15:54,
8,19,BANGKA BELITUNG,https://res.cloudinary.com/alta-minpro/image/u...,2024-06-06 04:15:54,2024-06-06 04:15:54,
9,21,KEPULAUAN RIAU,https://res.cloudinary.com/alta-minpro/image/u...,2024-06-06 04:15:54,2024-06-06 04:15:54,


#### 4.10 Tampilkan Dataframe cities

In [13]:
df_cities = dataframes['cities']
df_cities

Unnamed: 0,id,province_id,name,created_at,updated_at,deleted_at
0,1101,11,KAB. ACEH SELATAN,2024-06-06 04:15:54,2024-06-06 04:15:54,
1,1102,11,KAB. ACEH TENGGARA,2024-06-06 04:15:54,2024-06-06 04:15:54,
2,1103,11,KAB. ACEH TIMUR,2024-06-06 04:15:54,2024-06-06 04:15:54,
3,1104,11,KAB. ACEH TENGAH,2024-06-06 04:15:54,2024-06-06 04:15:54,
4,1105,11,KAB. ACEH BARAT,2024-06-06 04:15:54,2024-06-06 04:15:54,
...,...,...,...,...,...,...
509,9504,95,KAB. TOLIKARA,2024-06-06 04:15:56,2024-06-06 04:15:56,
510,9505,95,KAB. MAMBERAMO TENGAH,2024-06-06 04:15:56,2024-06-06 04:15:56,
511,9506,95,KAB. YALIMO,2024-06-06 04:15:56,2024-06-06 04:15:56,
512,9507,95,KAB. LANNY JAYA,2024-06-06 04:15:56,2024-06-06 04:15:56,


#### 4.11 Tampilkan Dataframe subdistricts

In [14]:
df_subdistricts = dataframes['subdistricts']
df_subdistricts

Unnamed: 0,id,city_id,name,created_at,updated_at,deleted_at
0,110101,1101,Bakongan,2024-06-06 04:15:56,2024-06-06 04:15:56,
1,110102,1101,Kluet Utara,2024-06-06 04:15:56,2024-06-06 04:15:56,
2,110103,1101,Kluet Selatan,2024-06-06 04:15:56,2024-06-06 04:15:56,
3,110104,1101,Labuhanhaji,2024-06-06 04:15:56,2024-06-06 04:15:56,
4,110105,1101,Meukek,2024-06-06 04:15:56,2024-06-06 04:15:56,
...,...,...,...,...,...,...
7272,950828,9508,Moba,2024-06-06 04:16:33,2024-06-06 04:16:33,
7273,950829,9508,Wutpaga,2024-06-06 04:16:33,2024-06-06 04:16:33,
7274,950830,9508,Nenggeagin,2024-06-06 04:16:33,2024-06-06 04:16:33,
7275,950831,9508,Krepkuri,2024-06-06 04:16:33,2024-06-06 04:16:33,


#### 4.12 Tampilkan Dataframe destination_media

In [15]:
df_destination_media = dataframes['destination_media']
df_destination_media

Unnamed: 0,id,destination_id,url,type,title,created_at,updated_at,deleted_at
0,1bde58e3-ef19-4daa-9df7-084ba5d3e930,306d305e-3359-4884-8d38-89c04e8adec9,http://commondatastorage.googleapis.com/gtv-vi...,video,Tears of Steel 10,2024-06-07 07:47:59,2024-06-07 07:47:59,
1,1bde58e3-ef19-4daa-9df7-084ba5d3e931,306d305e-3359-4884-8d38-89c04e8adea6,http://commondatastorage.googleapis.com/gtv-vi...,video,Tears of Steel 1,2024-06-07 07:47:59,2024-06-07 07:47:59,
2,1bde58e3-ef19-4daa-9df7-084ba5d3e932,306d305e-3359-4884-8d38-89c04e8adec1,http://commondatastorage.googleapis.com/gtv-vi...,video,Volkswagen GTI Review 2,2024-06-07 07:47:59,2024-06-07 07:47:59,
3,1bde58e3-ef19-4daa-9df7-084ba5d3e933,306d305e-3359-4884-8d38-89c04e8adec2,http://commondatastorage.googleapis.com/gtv-vi...,video,We Are Going On Bullrun 3,2024-06-07 07:47:59,2024-06-07 07:47:59,
4,1bde58e3-ef19-4daa-9df7-084ba5d3e934,306d305e-3359-4884-8d38-89c04e8adec3,http://commondatastorage.googleapis.com/gtv-vi...,video,Tears of Steel 4,2024-06-07 07:47:59,2024-06-07 07:47:59,
5,1bde58e3-ef19-4daa-9df7-084ba5d3e935,306d305e-3359-4884-8d38-89c04e8adec4,http://commondatastorage.googleapis.com/gtv-vi...,video,Volkswagen GTI Review 5,2024-06-07 07:47:59,2024-06-07 07:47:59,
6,1bde58e3-ef19-4daa-9df7-084ba5d3e936,306d305e-3359-4884-8d38-89c04e8adec5,http://commondatastorage.googleapis.com/gtv-vi...,video,We Are Going On Bullrun 6,2024-06-07 07:47:59,2024-06-07 07:47:59,
7,1bde58e3-ef19-4daa-9df7-084ba5d3e937,306d305e-3359-4884-8d38-89c04e8adec6,http://commondatastorage.googleapis.com/gtv-vi...,video,Tears of Steel 7,2024-06-07 07:47:59,2024-06-07 07:47:59,
8,1bde58e3-ef19-4daa-9df7-084ba5d3e938,306d305e-3359-4884-8d38-89c04e8adec7,http://commondatastorage.googleapis.com/gtv-vi...,video,Volkswagen GTI Review 8,2024-06-07 07:47:59,2024-06-07 07:47:59,
9,1bde58e3-ef19-4daa-9df7-084ba5d3e939,306d305e-3359-4884-8d38-89c04e8adec8,http://commondatastorage.googleapis.com/gtv-vi...,video,We Are Going On Bullrun 9,2024-06-07 07:47:59,2024-06-07 07:47:59,


### 5. Transformation

#### 5.1 Merged df_destination_facilities dan df_facilities

In [16]:
# Menggabungkan DataFrame
df_merged_facilities = pd.merge(df_destination_facilities, df_facilities, left_on='facility_id', right_on='id')

# Memilih kolom yang diinginkan dari df_facilities
df_dim_facilities = df_merged_facilities[['facility_id', 'name', 'url']].drop_duplicates()
df_dim_facilities.rename(columns={'facility_id': 'id'}, inplace=True)

# Mengurutkan index kembali
df_dim_facilities.reset_index(drop=True, inplace=True)

##### 5.1.1 Dim Facilities

In [17]:
# Menampilkan DataFrame hasil
df_dim_facilities

Unnamed: 0,id,name,url
0,515cb2da-3361-48b8-99fd-bd894828efa3,Pusat Kesehatan,https://picsum.photos/300/200
1,515cb2da-3361-48b8-99fd-bd894828efd6,Toko Oleh-Oleh,https://picsum.photos/300/200
2,515cb2da-3361-48b8-99fd-bd894828efa5,Tempat Penitipan Barang,https://picsum.photos/300/200
3,515cb2da-3361-48b8-99fd-bd894828efd1,Kamar Mandi,https://picsum.photos/300/200
4,515cb2da-3361-48b8-99fd-bd894828efa2,WiFi Gratis,https://picsum.photos/300/200
5,515cb2da-3361-48b8-99fd-bd894828efd4,Penginapan,https://picsum.photos/300/200
6,515cb2da-3361-48b8-99fd-bd894828efa4,Layanan Penjemputan,https://picsum.photos/300/200


#### 5.2 Merged df_destination_addresses dengan df_provinces, df_cities, df_subdistricts

In [18]:
# Gabungkan DataFrame
df_merged_address = pd.merge(df_destination_addresses, df_provinces, left_on='province_id', right_on='id', suffixes=('_dest', '_prov'))
df_merged_address = pd.merge(df_merged_address, df_cities, left_on='city_id', right_on='id', suffixes=('_prov', '_city'))
df_merged_address = pd.merge(df_merged_address, df_subdistricts, left_on='subdistrict_id', right_on='id', suffixes=('_city', '_subd'))

# Pilih kolom yang diinginkan
df_dim_address = df_merged_address[['id_dest', 'name_prov', 'name_city', 'name', 'street_name', 'postal_code']]

# Ubah nama kolom
df_dim_address.columns = ['id', 'provinces', 'cities', 'subdistricts', 'street_name', 'postal_code']

##### 5.2.1 Dim Address

In [19]:
# Tampilkan DataFrame hasil
df_dim_address

Unnamed: 0,id,provinces,cities,subdistricts,street_name,postal_code
0,eb77b590-b255-4ea1-b11a-d445a259ac70,PAPUA PEGUNUNGAN,KAB. TOLIKARA,Wakuwo,Jalan Sam Ratulangi,95111
1,eb77b590-b255-4ea1-b11a-d445a259ac71,PAPUA,KAB. BIAK NUMFOR,Padaido,Jalan MH Thamrin,10110
2,eb77b590-b255-4ea1-b11a-d445a259ac72,JAWA TIMUR,KAB. SAMPANG,Omben,Jalan Dago,40181
3,eb77b590-b255-4ea1-b11a-d445a259ac75,JAWA TIMUR,KAB. PACITAN,Kebonagung,Jalan Sunset Road,80228
4,eb77b590-b255-4ea1-b11a-d445a259ac77,JAWA TIMUR,KAB. SITUBONDO,Mangaran,Jalan Pandanaran,50149
5,eb77b590-b255-4ea1-b11a-d445a259ac78,JAWA TIMUR,KAB. BANYUWANGI,Licin,Jalan Pettarani,90114
6,eb77b590-b255-4ea1-b11a-d445a259ac73,SUMATERA UTARA,KAB. TOBA,Balige,Jalan Tunjungan,60231
7,eb77b590-b255-4ea1-b11a-d445a259ac74,JAWA BARAT,KAB. SUBANG,Jalancagak,Jalan Malioboro,55281
8,eb77b590-b255-4ea1-b11a-d445a259ac76,SULAWESI TENGGARA,KAB. KONAWE,Meluhu,Jalan Gatot Subroto,20151
9,eb77b590-b255-4ea1-b11a-d445a259ac79,SUMATERA BARAT,KAB. DHARMASRAYA,Timpeh,Jalan Sudirman,76125


#### 5.3 Drop Column di df_destinations (Dim Destinations)

In [20]:
# Menghapus kolom-kolom yang tidak diinginkan
columns_to_drop = ['category_id', 'created_at', 'updated_at', 'deleted_at']
df_dim_destinations = df_destinations.drop(columns=columns_to_drop)

# Tampilkan hasil
df_dim_destinations

Unnamed: 0,id,name,description,open_time,close_time,entry_price,longitude,latitude,visit_count
0,306d305e-3359-4884-8d38-89c04e8adea6,Kawah Ijen,Kawah Ijen adalah sebuah kompleks gunung berap...,08:00,17:00,100000.0,114.2423,-8.0582,23
1,306d305e-3359-4884-8d38-89c04e8adec1,Candi Borobudur,Candi Borobudur adalah sebuah candi Buddha yan...,08:00,17:00,50000.0,110.2038,-7.6079,4
2,306d305e-3359-4884-8d38-89c04e8adec2,Pantai Kuta,Pantai Kuta adalah salah satu pantai yang terk...,08:00,17:00,0.0,115.1675,-8.7174,1
3,306d305e-3359-4884-8d38-89c04e8adec3,Danau Toba,Danau Toba adalah danau terbesar di Indonesia ...,08:00,17:00,0.0,99.0852,2.6696,0
4,306d305e-3359-4884-8d38-89c04e8adec4,Taman Mini Indonesia Indah,Taman Mini Indonesia Indah adalah sebuah taman...,08:00,17:00,20000.0,106.8956,-6.3027,2
5,306d305e-3359-4884-8d38-89c04e8adec5,Gunung Bromo,Gunung Bromo adalah sebuah gunung berapi aktif...,08:00,17:00,30000.0,112.9528,-7.9425,1
6,306d305e-3359-4884-8d38-89c04e8adec6,Goa Pindul,Gua tempat Joko terbentur tersebut dinamai Gua...,08:00,17:00,25000.0,123.456,456.789,1
7,306d305e-3359-4884-8d38-89c04e8adec7,Pulau Komodo,Pulau Komodo adalah sebuah pulau yang terletak...,08:00,17:00,150000.0,119.4986,-8.5833,0
8,306d305e-3359-4884-8d38-89c04e8adec8,Raja Ampat,Raja Ampat adalah kepulauan yang terletak di b...,08:00,17:00,500000.0,130.5036,-1.0562,1
9,306d305e-3359-4884-8d38-89c04e8adec9,Tanah Lot,Tanah Lot adalah sebuah formasi batuan di lepa...,08:00,17:00,20000.0,115.0865,-8.6211,0


#### 5.4 Drop Column di df_categories (Dim Categories)

In [21]:
# Menghapus kolom-kolom yang tidak diinginkan
columns_to_drop = ['url', 'created_at', 'updated_at', 'deleted_at']
df_dim_categories = df_categories.drop(columns=columns_to_drop)

# Tampilkan DataFrame setelah penghapusan kolom
df_dim_categories

Unnamed: 0,id,name
0,eb77b590-b255-4ea1-b11a-d445a259ac61,Alam
1,eb77b590-b255-4ea1-b11a-d445a259ac62,Seni dan Budaya
2,eb77b590-b255-4ea1-b11a-d445a259ac63,Sejarah


#### 5.5 Drop Column di df_destination_media (Dim Medias)

In [22]:
# Menghapus kolom-kolom yang tidak diinginkan
columns_to_drop = ['destination_id', 'created_at', 'updated_at', 'deleted_at']
df_dim_medias = df_destination_media.drop(columns=columns_to_drop)

# Tampilkan DataFrame setelah penghapusan kolom
df_dim_medias

Unnamed: 0,id,url,type,title
0,1bde58e3-ef19-4daa-9df7-084ba5d3e930,http://commondatastorage.googleapis.com/gtv-vi...,video,Tears of Steel 10
1,1bde58e3-ef19-4daa-9df7-084ba5d3e931,http://commondatastorage.googleapis.com/gtv-vi...,video,Tears of Steel 1
2,1bde58e3-ef19-4daa-9df7-084ba5d3e932,http://commondatastorage.googleapis.com/gtv-vi...,video,Volkswagen GTI Review 2
3,1bde58e3-ef19-4daa-9df7-084ba5d3e933,http://commondatastorage.googleapis.com/gtv-vi...,video,We Are Going On Bullrun 3
4,1bde58e3-ef19-4daa-9df7-084ba5d3e934,http://commondatastorage.googleapis.com/gtv-vi...,video,Tears of Steel 4
5,1bde58e3-ef19-4daa-9df7-084ba5d3e935,http://commondatastorage.googleapis.com/gtv-vi...,video,Volkswagen GTI Review 5
6,1bde58e3-ef19-4daa-9df7-084ba5d3e936,http://commondatastorage.googleapis.com/gtv-vi...,video,We Are Going On Bullrun 6
7,1bde58e3-ef19-4daa-9df7-084ba5d3e937,http://commondatastorage.googleapis.com/gtv-vi...,video,Tears of Steel 7
8,1bde58e3-ef19-4daa-9df7-084ba5d3e938,http://commondatastorage.googleapis.com/gtv-vi...,video,Volkswagen GTI Review 8
9,1bde58e3-ef19-4daa-9df7-084ba5d3e939,http://commondatastorage.googleapis.com/gtv-vi...,video,We Are Going On Bullrun 9


#### 5.6 Merged for Fact Table (destination_fact)

In [23]:
# Merge dataframes
merged_df = pd.merge(df_destinations, df_categories, left_on='category_id', right_on='id', suffixes=('_destinations', '_categories'))
merged_df = pd.merge(merged_df, df_destination_media, left_on='id_destinations', right_on='destination_id')
merged_df = pd.merge(merged_df, df_destination_facilities, left_on='id_destinations', right_on='destination_id', how='left')
merged_df = pd.merge(merged_df, df_destination_addresses, left_on='id_destinations', right_on='destination_id', how='left')

merged_df


Unnamed: 0,id_destinations,category_id,name_destinations,description,open_time,close_time,entry_price,longitude,latitude,visit_count,...,id,destination_id,province_id,city_id,subdistrict_id,street_name,postal_code,created_at,updated_at,deleted_at
0,306d305e-3359-4884-8d38-89c04e8adea6,eb77b590-b255-4ea1-b11a-d445a259ac61,Kawah Ijen,Kawah Ijen adalah sebuah kompleks gunung berap...,08:00,17:00,100000.0,114.2423,-8.0582,23,...,eb77b590-b255-4ea1-b11a-d445a259ac71,306d305e-3359-4884-8d38-89c04e8adea6,91,9106,910610,Jalan MH Thamrin,10110,2024-06-07 07:47:59,2024-06-07 07:47:59,
1,306d305e-3359-4884-8d38-89c04e8adea6,eb77b590-b255-4ea1-b11a-d445a259ac61,Kawah Ijen,Kawah Ijen adalah sebuah kompleks gunung berap...,08:00,17:00,100000.0,114.2423,-8.0582,23,...,eb77b590-b255-4ea1-b11a-d445a259ac71,306d305e-3359-4884-8d38-89c04e8adea6,91,9106,910610,Jalan MH Thamrin,10110,2024-06-07 07:47:59,2024-06-07 07:47:59,
2,306d305e-3359-4884-8d38-89c04e8adec2,eb77b590-b255-4ea1-b11a-d445a259ac61,Pantai Kuta,Pantai Kuta adalah salah satu pantai yang terk...,08:00,17:00,0.0,115.1675,-8.7174,1,...,eb77b590-b255-4ea1-b11a-d445a259ac73,306d305e-3359-4884-8d38-89c04e8adec2,12,1212,121201,Jalan Tunjungan,60231,2024-06-07 07:47:59,2024-06-07 07:47:59,
3,306d305e-3359-4884-8d38-89c04e8adec2,eb77b590-b255-4ea1-b11a-d445a259ac61,Pantai Kuta,Pantai Kuta adalah salah satu pantai yang terk...,08:00,17:00,0.0,115.1675,-8.7174,1,...,eb77b590-b255-4ea1-b11a-d445a259ac73,306d305e-3359-4884-8d38-89c04e8adec2,12,1212,121201,Jalan Tunjungan,60231,2024-06-07 07:47:59,2024-06-07 07:47:59,
4,306d305e-3359-4884-8d38-89c04e8adec3,eb77b590-b255-4ea1-b11a-d445a259ac61,Danau Toba,Danau Toba adalah danau terbesar di Indonesia ...,08:00,17:00,0.0,99.0852,2.6696,0,...,eb77b590-b255-4ea1-b11a-d445a259ac74,306d305e-3359-4884-8d38-89c04e8adec3,32,3213,321312,Jalan Malioboro,55281,2024-06-07 07:47:59,2024-06-07 07:47:59,
5,306d305e-3359-4884-8d38-89c04e8adec3,eb77b590-b255-4ea1-b11a-d445a259ac61,Danau Toba,Danau Toba adalah danau terbesar di Indonesia ...,08:00,17:00,0.0,99.0852,2.6696,0,...,eb77b590-b255-4ea1-b11a-d445a259ac74,306d305e-3359-4884-8d38-89c04e8adec3,32,3213,321312,Jalan Malioboro,55281,2024-06-07 07:47:59,2024-06-07 07:47:59,
6,306d305e-3359-4884-8d38-89c04e8adec4,eb77b590-b255-4ea1-b11a-d445a259ac61,Taman Mini Indonesia Indah,Taman Mini Indonesia Indah adalah sebuah taman...,08:00,17:00,20000.0,106.8956,-6.3027,2,...,eb77b590-b255-4ea1-b11a-d445a259ac75,306d305e-3359-4884-8d38-89c04e8adec4,35,3501,350105,Jalan Sunset Road,80228,2024-06-07 07:47:59,2024-06-07 07:47:59,
7,306d305e-3359-4884-8d38-89c04e8adec4,eb77b590-b255-4ea1-b11a-d445a259ac61,Taman Mini Indonesia Indah,Taman Mini Indonesia Indah adalah sebuah taman...,08:00,17:00,20000.0,106.8956,-6.3027,2,...,eb77b590-b255-4ea1-b11a-d445a259ac75,306d305e-3359-4884-8d38-89c04e8adec4,35,3501,350105,Jalan Sunset Road,80228,2024-06-07 07:47:59,2024-06-07 07:47:59,
8,306d305e-3359-4884-8d38-89c04e8adec6,eb77b590-b255-4ea1-b11a-d445a259ac61,Goa Pindul,Gua tempat Joko terbentur tersebut dinamai Gua...,08:00,17:00,25000.0,123.456,456.789,1,...,eb77b590-b255-4ea1-b11a-d445a259ac77,306d305e-3359-4884-8d38-89c04e8adec6,35,3512,351209,Jalan Pandanaran,50149,2024-06-07 07:47:59,2024-06-07 07:47:59,
9,306d305e-3359-4884-8d38-89c04e8adec6,eb77b590-b255-4ea1-b11a-d445a259ac61,Goa Pindul,Gua tempat Joko terbentur tersebut dinamai Gua...,08:00,17:00,25000.0,123.456,456.789,1,...,eb77b590-b255-4ea1-b11a-d445a259ac77,306d305e-3359-4884-8d38-89c04e8adec6,35,3512,351209,Jalan Pandanaran,50149,2024-06-07 07:47:59,2024-06-07 07:47:59,


In [24]:
print(merged_df.columns)

Index(['id_destinations', 'category_id', 'name_destinations', 'description',
       'open_time', 'close_time', 'entry_price', 'longitude', 'latitude',
       'visit_count', 'created_at_destinations', 'updated_at_destinations',
       'deleted_at_destinations', 'id_categories', 'name_categories', 'url_x',
       'created_at_categories', 'updated_at_categories',
       'deleted_at_categories', 'id_x', 'destination_id_x', 'url_y', 'type',
       'title', 'created_at_x', 'updated_at_x', 'deleted_at_x', 'id_y',
       'destination_id_y', 'facility_id', 'created_at_y', 'updated_at_y',
       'deleted_at_y', 'id', 'destination_id', 'province_id', 'city_id',
       'subdistrict_id', 'street_name', 'postal_code', 'created_at',
       'updated_at', 'deleted_at'],
      dtype='object')


#### 5.7 Merged for Fact Table

In [25]:
# Calculate additional columns
merged_df['total_content_video'] = (merged_df['type'] == 'video').astype(int)
merged_df['total_pendapatan'] = merged_df['entry_price'] * merged_df['visit_count']

# Select relevant columns for fact table
df_destination_fact = merged_df[['id_destinations', 'category_id', 'id_x', 'facility_id', 'id', 'total_content_video', 'total_pendapatan']]

# Rename columns
df_destination_fact.columns = ['destinations_id', 'categories_id', 'medias_id', 'facilities_id', 'address_id', 'total_content_video', 'total_pendapatan']

# Drop duplicates
df_destination_fact = df_destination_fact.drop_duplicates()

# Preview the result
df_destination_fact

Unnamed: 0,destinations_id,categories_id,medias_id,facilities_id,address_id,total_content_video,total_pendapatan
0,306d305e-3359-4884-8d38-89c04e8adea6,eb77b590-b255-4ea1-b11a-d445a259ac61,1bde58e3-ef19-4daa-9df7-084ba5d3e931,515cb2da-3361-48b8-99fd-bd894828efd6,eb77b590-b255-4ea1-b11a-d445a259ac71,1,2300000.0
1,306d305e-3359-4884-8d38-89c04e8adea6,eb77b590-b255-4ea1-b11a-d445a259ac61,1bde58e3-ef19-4daa-9df7-084ba5d3e941,515cb2da-3361-48b8-99fd-bd894828efd6,eb77b590-b255-4ea1-b11a-d445a259ac71,0,2300000.0
2,306d305e-3359-4884-8d38-89c04e8adec2,eb77b590-b255-4ea1-b11a-d445a259ac61,1bde58e3-ef19-4daa-9df7-084ba5d3e933,515cb2da-3361-48b8-99fd-bd894828efa5,eb77b590-b255-4ea1-b11a-d445a259ac73,1,0.0
3,306d305e-3359-4884-8d38-89c04e8adec2,eb77b590-b255-4ea1-b11a-d445a259ac61,1bde58e3-ef19-4daa-9df7-084ba5d3e943,515cb2da-3361-48b8-99fd-bd894828efa5,eb77b590-b255-4ea1-b11a-d445a259ac73,0,0.0
4,306d305e-3359-4884-8d38-89c04e8adec3,eb77b590-b255-4ea1-b11a-d445a259ac61,1bde58e3-ef19-4daa-9df7-084ba5d3e934,515cb2da-3361-48b8-99fd-bd894828efd1,eb77b590-b255-4ea1-b11a-d445a259ac74,1,0.0
5,306d305e-3359-4884-8d38-89c04e8adec3,eb77b590-b255-4ea1-b11a-d445a259ac61,1bde58e3-ef19-4daa-9df7-084ba5d3e944,515cb2da-3361-48b8-99fd-bd894828efd1,eb77b590-b255-4ea1-b11a-d445a259ac74,0,0.0
6,306d305e-3359-4884-8d38-89c04e8adec4,eb77b590-b255-4ea1-b11a-d445a259ac61,1bde58e3-ef19-4daa-9df7-084ba5d3e935,515cb2da-3361-48b8-99fd-bd894828efa2,eb77b590-b255-4ea1-b11a-d445a259ac75,1,40000.0
7,306d305e-3359-4884-8d38-89c04e8adec4,eb77b590-b255-4ea1-b11a-d445a259ac61,1bde58e3-ef19-4daa-9df7-084ba5d3e945,515cb2da-3361-48b8-99fd-bd894828efa2,eb77b590-b255-4ea1-b11a-d445a259ac75,0,40000.0
8,306d305e-3359-4884-8d38-89c04e8adec6,eb77b590-b255-4ea1-b11a-d445a259ac61,1bde58e3-ef19-4daa-9df7-084ba5d3e937,515cb2da-3361-48b8-99fd-bd894828efd4,eb77b590-b255-4ea1-b11a-d445a259ac77,1,25000.0
9,306d305e-3359-4884-8d38-89c04e8adec6,eb77b590-b255-4ea1-b11a-d445a259ac61,1bde58e3-ef19-4daa-9df7-084ba5d3e947,515cb2da-3361-48b8-99fd-bd894828efd4,eb77b590-b255-4ea1-b11a-d445a259ac77,0,25000.0


### 6. Load to Big Query

#### 6.1 Konfigurasi ke Big Query

In [26]:
# Load environment variables from .env file
load_dotenv()

# Google Cloud configurations
project_id = os.getenv('GOOGLE_CLOUD_PROJECT_ID')
dataset_id = os.getenv('GOOGLE_CLOUD_DATASET_ID')
google_application_credentials = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')

# Set GOOGLE_APPLICATION_CREDENTIALS environment variable
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = google_application_credentials

# Load Google Cloud credentials
credentials = service_account.Credentials.from_service_account_file(google_application_credentials)

#### 6.2 Load dataframe to big query

In [27]:
# Dictionary to map DataFrame names to BigQuery table names
dataframes = {
    'dim_facilities': df_dim_facilities,
    'dim_address': df_dim_address,
    'dim_destinations': df_dim_destinations,
    'dim_categories': df_dim_categories,
    'dim_medias': df_dim_medias,
    'destination_fact': df_destination_fact,
}

# Function to load DataFrame into BigQuery
def load_to_bq(df, table_name):
    table_id = f"{project_id}.{dataset_id}.{table_name}"
    to_gbq(df, table_id, project_id=project_id, if_exists='replace', credentials=credentials)

# Iterate over DataFrame dictionary and load each to BigQuery
for table_name, df in dataframes.items():
    load_to_bq(df, table_name)

print("All dataframes loaded to BigQuery successfully.")

100%|██████████| 1/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 8577.31it/s]

All dataframes loaded to BigQuery successfully.



