### 1. Install Library

In [1]:
# %pip install pandas-gbq google-cloud-bigquery
# %pip install pandas-gbq google-auth google-auth-oauthlib

### 2. Import Library

In [2]:
import pandas as pd
import mysql.connector
from google.oauth2 import service_account
from google.cloud import bigquery
import pandas_gbq
from pandas_gbq import to_gbq
import os
from dotenv import load_dotenv
from datetime import datetime, timedelta

### 3. Database Configuration

In [3]:
# Load konfigurasi dari .env
load_dotenv()

# MySQL configurations
rds_host = os.getenv('RDS_HOST')
rds_dbname = os.getenv('RDS_DBNAME')
rds_user = os.getenv('RDS_USER')
rds_password = os.getenv('RDS_PASSWORD')

# Google Cloud configurations
project_id = os.getenv('GOOGLE_CLOUD_PROJECT_ID')
dataset_id = os.getenv('GOOGLE_CLOUD_DATASET_ID')
google_application_credentials = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = google_application_credentials

# Koneksi ke MySQL di Amazon RDS
conn_rds = mysql.connector.connect(
    host=rds_host,
    database=rds_dbname,
    user=rds_user,
    password=rds_password
)

### 4. Extract

#### table_to_df function below is used to extract data from a table and convert it into a dataframe

In [4]:
def table_to_df(table_name):
   query = f"SELECT * FROM {table_name}"
   df = pd.read_sql_query(query, conn_rds)
   return df

#### 4.1 Converting data from tables to dataframes

In [5]:
destinations_df = table_to_df('destinations')
routes_df = table_to_df('routes')
route_details_df = table_to_df('route_details')
users_df = table_to_df('users')

  df = pd.read_sql_query(query, conn_rds)


#### 4.2 destinations dataframe

In [6]:
destinations_df

Unnamed: 0,id,category_id,name,description,open_time,close_time,entry_price,longitude,latitude,visit_count,created_at,updated_at,deleted_at
0,00319825-e81b-4a93-bf9a-eef836ccf6ce,eb77b590-b255-4ea1-b11a-d445a259ac62,Keraton Kasunanan Surakarta,Keraton Kasunanan Surakarta adalah istana resm...,9:00,14:00,20000.0,110.827903,-7.577736,293,2024-06-11 08:28:33,2024-06-20 09:53:01,2024-06-20 09:53:01.786
1,005e32da-f5fb-405c-9772-f6424c011fac,eb77b590-b255-4ea1-b11a-d445a259ac62,Desa Wisata Kasongan,Desa yang terkenal dengan kerajinan gerabah da...,8:00,17:00,0.0,110.337978,-7.845291,243,2024-06-11 08:28:27,2024-06-20 09:55:01,2024-06-20 09:55:01.453
2,01b8da3a-3987-4976-9ae7-46d6b8eacee5,eb77b590-b255-4ea1-b11a-d445a259ac62,Saung Angklung Udjo,Saung Angklung Udjo adalah pusat Seni dan Buda...,8:00,17:00,120000.0,107.654684,-6.897701,203,2024-06-11 08:28:33,2024-06-20 09:55:14,2024-06-20 09:55:14.647
3,01ccaac9-97fe-42e1-8034-bc0c02471692,eb77b590-b255-4ea1-b11a-d445a259ac62,Museum Etnografi Sendawar,Museum Etnografi Sendawar merupakan museum per...,9:00,18:00,5000.0,115.701909,-0.232651,134,2024-06-11 08:28:20,2024-06-20 10:00:54,2024-06-20 10:00:54.207
4,02834099-7ce7-44b9-8b27-e4db38f062bd,eb77b590-b255-4ea1-b11a-d445a259ac61,Danau Limboto,"Danau ini terletak di Kecamatan Limboto, tepat...",0:00,23:59,0.0,123.007543,0.577025,219,2024-06-11 08:28:21,2024-06-20 10:02:09,2024-06-20 10:02:09.429
...,...,...,...,...,...,...,...,...,...,...,...,...,...
365,fc6ab48d-e78f-45dd-8dec-cf67481ad9e9,eb77b590-b255-4ea1-b11a-d445a259ac61,Taman Margasatwa Ragunan,Taman Margasatwa Ragunan jadi pilihan favorit ...,7:00,16:00,4000.0,106.822578,-6.304916,191,2024-06-24 06:38:36,2024-06-24 06:38:36,NaT
366,fc780065-0848-467b-9625-9c24ecd2e9c2,eb77b590-b255-4ea1-b11a-d445a259ac61,Pantai Maaf,Pantai Maaf menawarkan perairan yang damai den...,6:00,18:00,10000.0,135.496749,-3.360245,163,2024-06-11 08:28:32,2024-06-11 08:28:32,NaT
367,fde8748a-2ec0-4211-89e5-12f3d63db81b,eb77b590-b255-4ea1-b11a-d445a259ac61,Piaynemo Raja Ampat,Piaynemo adalah salah satu ikon wisata di Raja...,10:00,17:00,150000.0,130.270813,-0.564206,123,2024-06-11 08:28:32,2024-06-11 08:28:32,NaT
368,fe1ebfe5-365f-4893-aa36-91e380bb2011,eb77b590-b255-4ea1-b11a-d445a259ac61,Wisata Pantai Gedo,Wisata Pantai Gedo adalah destinasi pantai ind...,6:00,18:00,15000.0,135.546550,-3.309413,300,2024-06-11 08:28:32,2024-06-23 06:37:28,NaT


#### 4.3 routes dataframe

In [7]:
routes_df

Unnamed: 0,id,user_id,city_id,name,start_location,start_longitude,start_latitude,price,created_at,updated_at,deleted_at
0,02f6854d-1b57-4079-a2e3-394e9f037bb4,ac91e0a6-0c10-4d88-b253-294b6eb661cf,3204,Liburan ke Bandung,Gedung Sate,107.620761,-6.916096,200000.0,2024-06-19 07:35:14,2024-06-21 08:56:09,2024-06-21 08:56:09.855
1,0458f0ec-4bbe-4950-99ad-afd5f48762d3,a48e02bb-6a68-4586-85e4-b038d795da5b,3204,bandung day 1,"1600 Amphitheatre Pkwy Building 43, Mountain ...",-122.084,37.421998,200000.0,2024-06-23 09:15:22,2024-06-23 09:15:22,NaT
2,0c9959ab-dd26-4f23-94d1-16d739bc9b94,00bddbb8-8e7d-46ab-acfc-aab981c1ada1,3402,Berwisata Ke Bantul,"HXRR+3V5, Parerejo Kecamatan Gading Rejo Kabup...",104.991381,-5.410041,170000.0,2024-06-23 11:09:05,2024-06-24 06:46:03,2024-06-24 06:46:03.217
3,0fa0de42-f7e5-4a8b-b6ee-476a244c8958,00bddbb8-8e7d-46ab-acfc-aab981c1ada1,9103,Jalan,"HXRR+3V5, Parerejo Kecamatan Gading Rejo Kabup...",104.991384,-5.410041,100000.0,2024-06-22 07:22:39,2024-06-22 12:15:45,2024-06-22 12:15:45.922
4,116dd3f5-1287-4b5f-a3d8-856e903529d4,00bddbb8-8e7d-46ab-acfc-aab981c1ada1,3511,P,"MX8P+Q65, Rejosari Pringsewu Pringsewu Regency...",104.985617,-5.333118,60000.0,2024-06-24 07:09:24,2024-06-24 07:16:02,2024-06-24 07:16:02.668
5,180e1894-3b5b-40a5-a958-cefb9ca33664,0f8f6c04-7a7d-4ca4-b058-63cd4ed09015,8271,jelajah ternate,"Jl. Sriwijaya No.264A, Mataram Timur Kecamatan...",116.111153,-8.59415,65000.0,2024-06-25 10:27:40,2024-06-25 10:27:40,NaT
6,1cef156e-2c92-462e-9597-ada9dc3e3109,306de58a-e5bb-4ecb-978a-50ac1903b085,3327,rute,"Jl. Polowijen 1 No.4, Polowijen Kecamatan Blim...",112.648574,-7.928092,90000.0,2024-06-23 10:45:45,2024-06-23 10:45:45,NaT
7,2644a29e-81a1-4ac1-95c8-711b19c8a09b,106273d9-26a0-45f2-973b-894862edd06e,3508,Pronojiwo Tour,"Jl. Sidotopo Wetan No.200, Pegirian Kecamatan ...",112.758205,-7.228514,45000.0,2024-06-24 12:53:14,2024-06-24 12:53:14,NaT
8,2eef2911-7f83-4a0a-90fa-8dbdb36987b2,00bddbb8-8e7d-46ab-acfc-aab981c1ada1,1672,Liburan Akhir Pekan,"HXRR+3V5, Parerejo Kecamatan Gading Rejo Kabup...",104.991384,-5.410042,60000.0,2024-06-23 06:50:01,2024-06-24 06:45:48,2024-06-24 06:45:48.431
9,2f6ee063-6fc9-44a2-962e-bb94ece1627e,ac91e0a6-0c10-4d88-b253-294b6eb661cf,1104,Jalan Jalan Keluarga akhir bulan,"Jl. Melinjau No.10a, Jati Karya Kecamatan Binj...",98.512382,3.644328,500000.0,2024-06-23 07:20:42,2024-06-23 07:20:42,NaT


#### 4.4 route_details dataframe

In [8]:
route_details_df

Unnamed: 0,id,destination_id,route_id,longitude,latitude,duration,order,visit_start,visit_end,created_at,updated_at,deleted_at
0,04fe05f6-7498-40fb-a1e6-8ef51be2a10c,e9044677-bc76-422e-b9be-6fb3f43c35fc,180e1894-3b5b-40a5-a958-cefb9ca33664,127.307562,0.836181,6300,3,0 days 13:00:00,0 days 14:00:00,2024-06-25 10:27:40,2024-06-25 10:27:40,NaT
1,0771cb1e-238d-4bf3-be0c-1fa3ffb0d143,f6ff5df8-673d-4318-b6dc-737ab3af4f6d,7c8ada92-890c-4558-ac7a-f89053d9ee2c,140.518725,-2.613298,8640,3,0 days 16:45:00,0 days 17:45:00,2024-06-22 07:22:46,2024-06-22 16:37:50,2024-06-22 16:37:50.688
2,07b19224-517f-4071-be96-c4136f92ccee,8cd2ea7d-0c4c-41f5-b9cf-bb16cd64ea25,ac367500-7ade-484e-8c2d-0d4b4ddcee9d,140.451761,-2.579639,10800,1,0 days 10:00:00,0 days 12:00:00,2024-06-23 11:03:59,2024-06-24 06:45:57,2024-06-24 06:45:57.366
3,083731a3-5252-4835-9680-e184b3758c8b,ee06a47e-f264-40e9-b9c8-1a3a78492d84,f6a0ec4c-7056-4109-9577-52be8072f5db,107.621455,-6.900718,5400,2,0 days 11:55:00,0 days 12:55:00,2024-06-23 06:36:52,2024-06-23 13:14:39,2024-06-23 13:14:39.427
4,0dfb44fa-c5b5-4af1-8f7e-f10d09198f82,45c50f14-480f-446e-bf5c-e27b4aabfe0c,8a10debc-3a23-45e1-b60e-a64025344a21,96.996653,4.592379,33185,1,0 days 10:15:00,0 days 17:15:00,2024-06-22 07:34:21,2024-06-22 21:42:58,2024-06-22 21:42:58.751
...,...,...,...,...,...,...,...,...,...,...,...,...
85,f30e8f0d-6019-4dc2-ad7d-a6fa88c50785,5fb0bedc-f1fa-4197-814f-f97155092473,0458f0ec-4bbe-4950-99ad-afd5f48762d3,107.402431,-7.166188,1800,1,0 days 09:00:00,0 days 10:30:00,2024-06-23 09:15:22,2024-06-23 09:15:22,NaT
86,f4449f3e-995d-458b-9ed1-a94e338cc1ac,4ce34993-985e-4e27-a31e-4753e6b8ef23,2eef2911-7f83-4a0a-90fa-8dbdb36987b2,103.159275,-4.022511,2700,2,0 days 11:00:00,0 days 11:45:00,2024-06-23 06:50:01,2024-06-24 06:45:48,2024-06-24 06:45:48.433
87,f5009d18-3e42-4832-9f62-be7d5fdb25e8,b3bbb0ed-c092-4cc8-af76-59f883e51738,30dca53e-f902-49ca-812a-38a586b386d3,116.461640,-8.410276,5400,1,0 days 07:00:00,0 days 14:00:00,2024-06-23 13:17:14,2024-06-24 06:46:29,2024-06-24 06:46:29.838
88,f9640b63-fa9e-459d-8d72-032b05ff7a3b,44dc96a4-5f1f-4dfb-a55f-311ab26053d8,ce2db343-fd75-4334-b6ad-658073ed2cf5,107.676829,-6.842148,6600,3,0 days 14:30:00,0 days 15:45:00,2024-06-24 02:46:04,2024-06-24 02:46:04,NaT


#### 4.5 users dataframe

In [9]:
users_df

Unnamed: 0,id,email,password,username,fullname,bio,phone_number,profile_image_url,gender,city,email_verified_at,created_at,updated_at,deleted_at,province,refresh_token
0,00bddbb8-8e7d-46ab-acfc-aab981c1ada1,irsyadyazidsyafiq@gmail.com,$2a$10$waUWV4/teNb.qGj8a/1PLeQ/7XxCFvUo6A8K0jU...,yazid.syafiq,Yazid Syafiq,Manusia Biasa Aja,081393984849,https://res.cloudinary.com/alta-minpro/image/u...,Pria,Pringsewu,2024-06-24 04:42:46.262,2024-05-18 03:32:14.211,2024-06-24 14:36:54.713,,Lampung,eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJJZCI6I...
1,03d7240b-3e9b-4105-ac93-a926b752bb81,shuu@example.com,$2a$10$4w825sLhnK7X3NuN4SOntemsOtaXvbGEZAvNV1f...,paimon,Kaedehara Kazuha,,081234567890,img.jpg,Wanita,Jaksel,NaT,2024-06-16 06:33:53.904,2024-06-24 03:16:38.804,,,
2,05d6e970-851b-4a8d-8869-587ebe5273cd,ulfiizza@gmail.com,$2a$10$tG82uWRFwAdiXW1xOHsmD.Xc/MxYPUmGcW3Zd6s...,ulfi,ulfi izza,,089123456123,https://res.cloudinary.com/alta-minpro/image/u...,Wanita,malang,NaT,2024-02-16 14:12:42.942,2024-06-24 03:22:39.515,,jawa timur,
3,083cb941-9a91-4ea4-af6e-5b25384ff00b,aventur@gmail.com,$2a$10$EH.mW4sj0PAF8kwc92Ft7eCyYWVNxmiDfSPQ2xe...,aventurine,aventurine,,081818181818,https://res.cloudinary.com/alta-minpro/image/u...,Pria,jakarta selatan,NaT,2024-01-09 15:51:32.982,2024-06-23 11:20:54.103,,jakarta,
4,08b0725a-c0c1-4f6d-a7cb-60e136d88c72,zain@gmail.com,$2a$10$B16UCmm8KU0hUralVTPaV.4nvjahndVr9I88T6b...,zain,zain m,,089231485105,https://res.cloudinary.com/alta-minpro/image/u...,Pria,Malang,NaT,2024-01-15 17:26:27.698,2024-06-16 13:34:19.510,,Jawa Timur,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
101,fa95b90d-41a8-44fe-aea8-e3b1553532c8,puterisalsaa8@gmail.com,$2a$10$fkklkqERHa.hDRZxlPHhp.dzWPhwSchFLKzC4y0...,inisalsa,Salsa Maulidina,,08115122238,,,,2024-06-10 12:12:21.233,2024-02-06 22:12:37.382,2024-06-10 12:12:23.104,,,eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJJZCI6I...
102,fc21e23a-ca8c-4cf9-8c5d-f1a56b431022,xhyeni@gmail.com,$2a$10$QpNu5wnAwZQKjvZc3QijZ.mjhw7XSx5FSC.UmjE...,xhyeni,xhyeni ananda,,086182836712,https://res.cloudinary.com/alta-minpro/image/u...,Wanita,Malang,NaT,2024-01-28 06:02:07.294,2024-06-19 06:14:44.495,,Jawa Timur,
103,fc9e86d9-e3ba-42eb-a46c-16ba07dfeb74,tajoli5069@javnoi.com,$2a$10$D5rUZTz5z2SkdRJMeogJLODp2Xp9kHSLTVtNTrC...,testverfiy25,Test Verify,,08123456789,,,,2024-05-28 07:16:51.573,2024-01-26 19:32:43.861,2024-05-28 07:16:51.576,,,
104,fd13566c-bd12-4088-8537-2a52dbdca23b,Nausicaa5@example.com,$2a$10$mR0l7hjnGtU6PF9V1LHTFerCZXqK.w7vBy.M.h0...,Nausicaa5,John Doe,,081234567891,,,,NaT,2024-02-17 15:37:16.964,2024-06-12 07:00:16.507,,,


#### 4.6 Closing connection

In [10]:
conn_rds.close()

### 5. Transformation

#### 5.1 Selecting subsets of each dataframe

In [11]:
# Kamus pemetaan bulan dalam bahasa Indonesia
month_mapping = {
    1: 'Jan', 2: 'Feb', 3: 'Mar', 4: 'Apr',
    5: 'Mei', 6: 'Jun', 7: 'Jul', 8: 'Agu',
    9: 'Sep', 10: 'Okt', 11: 'Nov', 12: 'Des'
}

# Ekstraksi tahun, bulan, dan hari
users_df['tahun'] = users_df['created_at'].dt.year
users_df['bulan'] = users_df['created_at'].dt.month.map(month_mapping)
users_df['tanggal'] = users_df['created_at'].dt.day
users_df['tanggallengkap'] = pd.to_datetime(users_df['created_at'].dt.date)

dim_destinations = destinations_df[['id', 'name', 'description', 'open_time', 'close_time', 'entry_price', 'longitude', 'latitude', 'visit_count']]
dim_routes = routes_df[['id', 'name', 'start_longitude', 'start_latitude', 'price']]
dim_route_details = route_details_df[['id', 'longitude', 'latitude', 'duration', 'order', 'visit_start', 'visit_end']]
dim_users = users_df[['id', 'email', 'username', 'fullname', 'phone_number', 'gender', 'city', 'province', 'tahun', 'bulan', 'tanggal', 'tanggallengkap']]

#### 5.2 destinations dimension

In [12]:
dim_destinations

Unnamed: 0,id,name,description,open_time,close_time,entry_price,longitude,latitude,visit_count
0,00319825-e81b-4a93-bf9a-eef836ccf6ce,Keraton Kasunanan Surakarta,Keraton Kasunanan Surakarta adalah istana resm...,9:00,14:00,20000.0,110.827903,-7.577736,293
1,005e32da-f5fb-405c-9772-f6424c011fac,Desa Wisata Kasongan,Desa yang terkenal dengan kerajinan gerabah da...,8:00,17:00,0.0,110.337978,-7.845291,243
2,01b8da3a-3987-4976-9ae7-46d6b8eacee5,Saung Angklung Udjo,Saung Angklung Udjo adalah pusat Seni dan Buda...,8:00,17:00,120000.0,107.654684,-6.897701,203
3,01ccaac9-97fe-42e1-8034-bc0c02471692,Museum Etnografi Sendawar,Museum Etnografi Sendawar merupakan museum per...,9:00,18:00,5000.0,115.701909,-0.232651,134
4,02834099-7ce7-44b9-8b27-e4db38f062bd,Danau Limboto,"Danau ini terletak di Kecamatan Limboto, tepat...",0:00,23:59,0.0,123.007543,0.577025,219
...,...,...,...,...,...,...,...,...,...
365,fc6ab48d-e78f-45dd-8dec-cf67481ad9e9,Taman Margasatwa Ragunan,Taman Margasatwa Ragunan jadi pilihan favorit ...,7:00,16:00,4000.0,106.822578,-6.304916,191
366,fc780065-0848-467b-9625-9c24ecd2e9c2,Pantai Maaf,Pantai Maaf menawarkan perairan yang damai den...,6:00,18:00,10000.0,135.496749,-3.360245,163
367,fde8748a-2ec0-4211-89e5-12f3d63db81b,Piaynemo Raja Ampat,Piaynemo adalah salah satu ikon wisata di Raja...,10:00,17:00,150000.0,130.270813,-0.564206,123
368,fe1ebfe5-365f-4893-aa36-91e380bb2011,Wisata Pantai Gedo,Wisata Pantai Gedo adalah destinasi pantai ind...,6:00,18:00,15000.0,135.546550,-3.309413,300


#### 5.3 routes dimension

In [13]:
dim_routes

Unnamed: 0,id,name,start_longitude,start_latitude,price
0,02f6854d-1b57-4079-a2e3-394e9f037bb4,Liburan ke Bandung,107.620761,-6.916096,200000.0
1,0458f0ec-4bbe-4950-99ad-afd5f48762d3,bandung day 1,-122.084,37.421998,200000.0
2,0c9959ab-dd26-4f23-94d1-16d739bc9b94,Berwisata Ke Bantul,104.991381,-5.410041,170000.0
3,0fa0de42-f7e5-4a8b-b6ee-476a244c8958,Jalan,104.991384,-5.410041,100000.0
4,116dd3f5-1287-4b5f-a3d8-856e903529d4,P,104.985617,-5.333118,60000.0
5,180e1894-3b5b-40a5-a958-cefb9ca33664,jelajah ternate,116.111153,-8.59415,65000.0
6,1cef156e-2c92-462e-9597-ada9dc3e3109,rute,112.648574,-7.928092,90000.0
7,2644a29e-81a1-4ac1-95c8-711b19c8a09b,Pronojiwo Tour,112.758205,-7.228514,45000.0
8,2eef2911-7f83-4a0a-90fa-8dbdb36987b2,Liburan Akhir Pekan,104.991384,-5.410042,60000.0
9,2f6ee063-6fc9-44a2-962e-bb94ece1627e,Jalan Jalan Keluarga akhir bulan,98.512382,3.644328,500000.0


#### 5.4 route_details dimension

In [14]:
dim_route_details['visit_start'] = dim_route_details['visit_start'].astype(str)
dim_route_details['visit_end'] = dim_route_details['visit_end'].astype(str)
dim_route_details

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dim_route_details['visit_start'] = dim_route_details['visit_start'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dim_route_details['visit_end'] = dim_route_details['visit_end'].astype(str)


Unnamed: 0,id,longitude,latitude,duration,order,visit_start,visit_end
0,04fe05f6-7498-40fb-a1e6-8ef51be2a10c,127.307562,0.836181,6300,3,0 days 13:00:00,0 days 14:00:00
1,0771cb1e-238d-4bf3-be0c-1fa3ffb0d143,140.518725,-2.613298,8640,3,0 days 16:45:00,0 days 17:45:00
2,07b19224-517f-4071-be96-c4136f92ccee,140.451761,-2.579639,10800,1,0 days 10:00:00,0 days 12:00:00
3,083731a3-5252-4835-9680-e184b3758c8b,107.621455,-6.900718,5400,2,0 days 11:55:00,0 days 12:55:00
4,0dfb44fa-c5b5-4af1-8f7e-f10d09198f82,96.996653,4.592379,33185,1,0 days 10:15:00,0 days 17:15:00
...,...,...,...,...,...,...,...
85,f30e8f0d-6019-4dc2-ad7d-a6fa88c50785,107.402431,-7.166188,1800,1,0 days 09:00:00,0 days 10:30:00
86,f4449f3e-995d-458b-9ed1-a94e338cc1ac,103.159275,-4.022511,2700,2,0 days 11:00:00,0 days 11:45:00
87,f5009d18-3e42-4832-9f62-be7d5fdb25e8,116.461640,-8.410276,5400,1,0 days 07:00:00,0 days 14:00:00
88,f9640b63-fa9e-459d-8d72-032b05ff7a3b,107.676829,-6.842148,6600,3,0 days 14:30:00,0 days 15:45:00


#### 5.5 users dimension

In [15]:
dim_users

Unnamed: 0,id,email,username,fullname,phone_number,gender,city,province,tahun,bulan,tanggal,tanggallengkap
0,00bddbb8-8e7d-46ab-acfc-aab981c1ada1,irsyadyazidsyafiq@gmail.com,yazid.syafiq,Yazid Syafiq,081393984849,Pria,Pringsewu,Lampung,2024,Mei,18,2024-05-18
1,03d7240b-3e9b-4105-ac93-a926b752bb81,shuu@example.com,paimon,Kaedehara Kazuha,081234567890,Wanita,Jaksel,,2024,Jun,16,2024-06-16
2,05d6e970-851b-4a8d-8869-587ebe5273cd,ulfiizza@gmail.com,ulfi,ulfi izza,089123456123,Wanita,malang,jawa timur,2024,Feb,16,2024-02-16
3,083cb941-9a91-4ea4-af6e-5b25384ff00b,aventur@gmail.com,aventurine,aventurine,081818181818,Pria,jakarta selatan,jakarta,2024,Jan,9,2024-01-09
4,08b0725a-c0c1-4f6d-a7cb-60e136d88c72,zain@gmail.com,zain,zain m,089231485105,Pria,Malang,Jawa Timur,2024,Jan,15,2024-01-15
...,...,...,...,...,...,...,...,...,...,...,...,...
101,fa95b90d-41a8-44fe-aea8-e3b1553532c8,puterisalsaa8@gmail.com,inisalsa,Salsa Maulidina,08115122238,,,,2024,Feb,6,2024-02-06
102,fc21e23a-ca8c-4cf9-8c5d-f1a56b431022,xhyeni@gmail.com,xhyeni,xhyeni ananda,086182836712,Wanita,Malang,Jawa Timur,2024,Jan,28,2024-01-28
103,fc9e86d9-e3ba-42eb-a46c-16ba07dfeb74,tajoli5069@javnoi.com,testverfiy25,Test Verify,08123456789,,,,2024,Jan,26,2024-01-26
104,fd13566c-bd12-4088-8537-2a52dbdca23b,Nausicaa5@example.com,Nausicaa5,John Doe,081234567891,,,,2024,Feb,17,2024-02-17


#### 5.6 routes_fact

In [16]:
# merging dataframes
merged_df = pd.merge(routes_df, route_details_df, left_on='id', right_on='route_id', suffixes=('_routes', '_route_details'))
merged_df = pd.merge(merged_df, users_df, left_on='user_id', right_on='id', suffixes=('_routes', '_users'))
merged_df = pd.merge(merged_df, destinations_df, left_on='destination_id', right_on='id', suffixes=('_routes_users', '_destinations'))

# Pilih subset kolom
routes_fact = merged_df[['id_routes', 'user_id', 'id_route_details', 'id_destinations']]

# renaming some of columns name
routes_fact = routes_fact.rename(columns={"id_routes": "route_id", 
                                          "id_route_details": "route_details_id", 
                                          "id_destinations": "destinations_id"})

# adding user_count and route_count columns
routes_fact['user_count'] = 1
routes_fact['route_count'] = 1

# Menghitung total destinations untuk setiap route_id
routes_fact['total_destination'] = routes_fact.groupby('route_id')['destinations_id'].transform('count')

# display the dataframe
routes_fact

Unnamed: 0,route_id,user_id,route_details_id,destinations_id,user_count,route_count,total_destination
0,02f6854d-1b57-4079-a2e3-394e9f037bb4,ac91e0a6-0c10-4d88-b253-294b6eb661cf,457c1a86-15aa-47da-9b57-dcd4814074a9,01b8da3a-3987-4976-9ae7-46d6b8eacee5,1,1,3
1,f34b3c72-3829-4ee1-993a-63cf37637b11,ac91e0a6-0c10-4d88-b253-294b6eb661cf,c8096bca-7ffd-4f3f-9e8a-97aa735f5efd,01b8da3a-3987-4976-9ae7-46d6b8eacee5,1,1,3
2,02f6854d-1b57-4079-a2e3-394e9f037bb4,ac91e0a6-0c10-4d88-b253-294b6eb661cf,55fab257-2df8-4ec7-87b7-1f883d8819f4,5fb0bedc-f1fa-4197-814f-f97155092473,1,1,3
3,f34b3c72-3829-4ee1-993a-63cf37637b11,ac91e0a6-0c10-4d88-b253-294b6eb661cf,453e97ca-fc14-4fef-81e4-4482738530d0,5fb0bedc-f1fa-4197-814f-f97155092473,1,1,3
4,0458f0ec-4bbe-4950-99ad-afd5f48762d3,a48e02bb-6a68-4586-85e4-b038d795da5b,f30e8f0d-6019-4dc2-ad7d-a6fa88c50785,5fb0bedc-f1fa-4197-814f-f97155092473,1,1,3
...,...,...,...,...,...,...,...
85,7c9badfb-b98a-4512-99ac-901ea23bf9de,94137aaf-70a2-4246-b227-fc3507d1f98b,4355697e-20cd-453b-8823-2069b328cc48,4388e724-d0b8-46e0-80bd-1362210b1cc1,1,1,2
86,7c9badfb-b98a-4512-99ac-901ea23bf9de,94137aaf-70a2-4246-b227-fc3507d1f98b,b0adb48f-f922-4830-9794-218d92b6a505,c752381e-bf5e-48c7-b8f2-27a07959bee8,1,1,2
87,45478b42-f990-4f6f-ab93-8007f2478779,ed295b2a-0a9f-40a0-8cae-461531ad949e,e96b4de5-eaf7-4ed0-90af-f41b051795cd,cafb4a28-6c52-4157-baaf-7080e3b642c6,1,1,1
88,ce2db343-fd75-4334-b6ad-658073ed2cf5,34e333f7-1465-4dc5-a159-3035da9ee459,d4959467-c893-44cf-8d0e-0978fcb1dce6,73697664-ba03-4de1-ad47-13fa4f95abcd,1,1,3


### 6. Load

#### load _to_gbq function below is used to load extract data from a dataframe into a table in google big query

In [17]:
def load_to_gbq(credentials, project_id, dataset_id, table_names, dataframes):
   for df, table_name in zip(dataframes, table_names):
      table_full_id = f'{project_id}.{dataset_id}.{table_name}'
      to_gbq(df, table_full_id, project_id=project_id, if_exists='replace', credentials=credentials)
      print(f'Table {table_name} loaded successfully!')

#### 6.1 Load to Big Query

In [18]:
credentials = service_account.Credentials.from_service_account_file(google_application_credentials)

dfs = [dim_destinations, dim_routes, dim_route_details, dim_users, routes_fact]
tables = ['dim_destinations', 'dim_routes', 'dim_route_details', 'dim_users', 'routes_fact']

load_to_gbq(credentials, project_id, dataset_id, tables, dfs)

100%|██████████| 1/1 [00:00<?, ?it/s]


Table dim_destinations loaded successfully!


100%|██████████| 1/1 [00:00<?, ?it/s]


Table dim_routes loaded successfully!


100%|██████████| 1/1 [00:00<?, ?it/s]


Table dim_route_details loaded successfully!


100%|██████████| 1/1 [00:00<?, ?it/s]


Table dim_users loaded successfully!


100%|██████████| 1/1 [00:00<?, ?it/s]

Table routes_fact loaded successfully!



