### 1. Install Library

In [68]:
# %pip install pandas-gbq google-cloud-bigquery
# %pip install pandas-gbq google-auth google-auth-oauthlib

### 2. Import Library

In [69]:
import pandas as pd
import numpy as np

import mysql.connector

from google.oauth2 import service_account
from google.cloud import bigquery
import pandas_gbq
from pandas_gbq import to_gbq

import os
from dotenv import load_dotenv
from datetime import datetime, timedelta

### 3. Database Configuration

In [70]:
# Load konfigurasi dari .env
load_dotenv()

# MySQL configurations
rds_host = os.getenv('RDS_HOST')
rds_dbname = os.getenv('RDS_DBNAME')
rds_user = os.getenv('RDS_USER')
rds_password = os.getenv('RDS_PASSWORD')

# Google Cloud configurations
project_id = os.getenv('GOOGLE_CLOUD_PROJECT_ID')
dataset_id = os.getenv('GOOGLE_CLOUD_DATASET_ID')
google_application_credentials = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = google_application_credentials

# Koneksi ke MySQL di Amazon RDS
conn_rds = mysql.connector.connect(
    host=rds_host,
    database=rds_dbname,
    user=rds_user,
    password=rds_password
)

### 4. Extract

#### table_to_df function below is used to extract data from a table and convert it into a dataframe

In [71]:
def table_to_df(table_name):
   query = f"SELECT * FROM {table_name}"
   df = pd.read_sql_query(query, conn_rds)
   return df

#### 4.1 Converting data from tables to dataframes

In [72]:
destinations_df = table_to_df('destinations')
routes_df = table_to_df('routes')
route_details_df = table_to_df('route_details')
users_df = table_to_df('users')

  df = pd.read_sql_query(query, conn_rds)


#### 4.2 destinations dataframe

In [73]:
destinations_df

Unnamed: 0,id,category_id,name,description,open_time,close_time,entry_price,longitude,latitude,visit_count,created_at,updated_at,deleted_at
0,00319825-e81b-4a93-bf9a-eef836ccf6ce,eb77b590-b255-4ea1-b11a-d445a259ac62,Keraton Kasunanan Surakarta,Keraton Kasunanan Surakarta adalah istana resm...,9:00,14:00,20000.0,110.827903,-7.577736,293,2024-06-11 08:28:33,2024-06-19 06:49:44,NaT
1,005e32da-f5fb-405c-9772-f6424c011fac,eb77b590-b255-4ea1-b11a-d445a259ac62,Desa Wisata Kasongan,Desa yang terkenal dengan kerajinan gerabah da...,8:00,17:00,0.0,110.337978,-7.845291,243,2024-06-11 08:28:27,2024-06-13 13:00:10,NaT
2,01b8da3a-3987-4976-9ae7-46d6b8eacee5,eb77b590-b255-4ea1-b11a-d445a259ac62,Saung Angklung Udjo,Saung Angklung Udjo adalah pusat Seni dan Buda...,8:00,17:00,120000.0,107.654684,-6.897701,203,2024-06-11 08:28:33,2024-06-17 19:02:31,NaT
3,01ccaac9-97fe-42e1-8034-bc0c02471692,eb77b590-b255-4ea1-b11a-d445a259ac62,Museum Etnografi Sendawar,Museum Etnografi Sendawar merupakan museum per...,9:00,18:00,5000.0,115.701909,-0.232651,134,2024-06-11 08:28:20,2024-06-13 16:58:21,NaT
4,02834099-7ce7-44b9-8b27-e4db38f062bd,eb77b590-b255-4ea1-b11a-d445a259ac61,Danau Limboto,"Danau ini terletak di Kecamatan Limboto, tepat...",0:00,23:59,0.0,123.007543,0.577025,219,2024-06-11 08:28:21,2024-06-15 14:26:21,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...
323,fbee819e-2b7b-44fe-8d5a-74dd345cea0e,eb77b590-b255-4ea1-b11a-d445a259ac61,Danau Tahai,Di kawasan objek wisata Danau Tahai ini terdap...,0:00,23:59,0.0,113.783202,-2.027312,136,2024-06-11 08:28:19,2024-06-11 08:28:19,NaT
324,fc780065-0848-467b-9625-9c24ecd2e9c2,eb77b590-b255-4ea1-b11a-d445a259ac61,Pantai Maaf,Pantai Maaf menawarkan perairan yang damai den...,6:00,18:00,10000.0,135.496749,-3.360245,163,2024-06-11 08:28:32,2024-06-11 08:28:32,NaT
325,fde8748a-2ec0-4211-89e5-12f3d63db81b,eb77b590-b255-4ea1-b11a-d445a259ac61,Piaynemo Raja Ampat,Piaynemo adalah salah satu ikon wisata di Raja...,10:00,17:00,150000.0,130.270813,-0.564206,123,2024-06-11 08:28:32,2024-06-11 08:28:32,NaT
326,fe1ebfe5-365f-4893-aa36-91e380bb2011,eb77b590-b255-4ea1-b11a-d445a259ac61,Wisata Pantai Gedo,Wisata Pantai Gedo adalah destinasi pantai ind...,6:00,18:00,15000.0,135.546550,-3.309413,299,2024-06-11 08:28:32,2024-06-11 08:28:32,NaT


#### 4.3 routes dataframe

In [74]:
routes_df

Unnamed: 0,id,user_id,city_id,name,start_location,start_longitude,start_latitude,price,created_at,updated_at,deleted_at
0,002501e6-fa15-42da-a61f-57b17efb8ee8,a8bddfb7-075a-4e3e-bfb0-c4a5f9d270d2,7605,Route ke 35,Hotel 35,119.923853,-9.605230,335000.0,2024-06-19 08:27:56,2024-06-19 08:27:58,NaT
1,025d2240-3026-4a14-b26c-1e8384165e41,c22f8c1e-c29f-450a-a0b7-002137142f3a,6371,Route ke 10,Hotel 10,114.691269,-6.602853,116000.0,2024-06-19 08:27:56,2024-06-19 08:27:58,NaT
2,026ab3bf-6b19-470b-9093-8820e3bc8384,81789c1c-7eea-4e7c-b15b-5114cc77dd1f,3174,Route ke 99,Hotel 99,113.069697,7.631735,386000.0,2024-06-19 08:27:56,2024-06-19 08:27:58,NaT
3,02f6854d-1b57-4079-a2e3-394e9f037bb4,ac91e0a6-0c10-4d88-b253-294b6eb661cf,3204,Liburan ke Bandung,Gedung Sate,107.620761,-6.916096,200000.0,2024-06-19 07:35:14,2024-06-19 07:35:14,NaT
4,0308d6d2-d28b-42d0-ad28-e64fd96bb2fd,6d1deb42-eedb-40da-b366-0102cc60461a,6201,Route ke 12,Hotel 12,12.010000,12.010000,100000.0,2024-06-14 04:23:55,2024-06-17 06:09:07,2024-06-17 06:09:07.879
...,...,...,...,...,...,...,...,...,...,...,...
252,f8fad44d-e5b1-4a3b-939e-1955bf9f7c67,6d1deb42-eedb-40da-b366-0102cc60461a,6201,Route ke 14,Hotel 14,12.010000,12.010000,100000.0,2024-06-14 04:23:55,2024-06-18 12:35:04,2024-06-18 12:35:04.091
253,f9ba7bf0-c6cd-4f4c-b18b-b4b08465234a,79b411cd-a0c4-4f63-892c-ee8643707551,6372,Route ke 28,Hotel 28,112.975730,-4.493524,364000.0,2024-06-19 05:40:14,2024-06-19 05:40:16,NaT
254,f9cb8326-e5e2-4db8-b14b-3bf270e39741,d3147562-84b4-4655-9c01-bc9ee1e5f21a,5201,Route ke 82,Hotel 82,111.150113,-5.183633,240000.0,2024-06-19 05:40:14,2024-06-19 05:40:16,NaT
255,fa4765c0-a4d4-450e-b6f4-e41bb6743189,a88e4b39-0280-4956-8f9d-f37e684e6f76,9207,Route ke 111,Hotel 111,117.008405,-3.332134,304000.0,2024-06-19 08:27:57,2024-06-19 08:27:58,NaT


#### 4.4 route_details dataframe

In [75]:
route_details_df

Unnamed: 0,id,destination_id,route_id,longitude,latitude,duration,order,visit_start,visit_end,created_at,updated_at,deleted_at
0,00774a2d-c7ac-442a-b0b3-afab936306b6,47be7168-4070-47fd-b233-2da1d0f26edb,f7389d27-55c6-405f-9f9c-a00a96cba5b9,111.633128,-2.675879,60,1,0 days 08:00:00,0 days 09:00:00,2024-06-19 08:27:57,2024-06-19 08:27:59,NaT
1,00bc0620-fec9-44f1-87b0-ba97e719bdd7,90965c23-b4ec-467c-9958-d5bf1c00bb14,a83fb4dc-7427-49e1-bbb0-97f987d2e74a,106.160397,-6.042848,120,2,0 days 08:00:00,0 days 09:00:00,2024-06-19 08:27:58,2024-06-19 08:28:00,NaT
2,00f932c8-a563-42b5-8579-7a08659daa36,ee06a47e-f264-40e9-b9c8-1a3a78492d84,e930a94e-cf71-4d94-bb58-c3a0d672fe27,107.621455,-6.900718,180,3,0 days 08:00:00,0 days 09:00:00,2024-06-19 05:40:15,2024-06-19 05:40:17,NaT
3,01992452-7884-4059-b530-d6b0fdb3fd13,75135c5f-6b11-4d7a-be03-d8583be6c2bb,a83fb4dc-7427-49e1-bbb0-97f987d2e74a,117.178030,-5.901893,180,3,0 days 08:00:00,0 days 09:00:00,2024-06-19 08:27:58,2024-06-19 08:28:00,NaT
4,019fb0be-4664-49cb-99e1-813548ebf86b,5f5a24f8-0797-4781-8f47-d3799d6e6d73,e8008647-a77f-4c4b-a90a-48afe24ecdf6,134.098755,-0.894371,60,1,0 days 08:00:00,0 days 09:00:00,2024-06-19 05:40:14,2024-06-19 05:40:16,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...
736,fe65d059-a13a-4cc7-8b57-846291fe1369,47be7168-4070-47fd-b233-2da1d0f26edb,9582f727-f581-4403-9c03-2a876813f497,111.633128,-2.675879,180,3,0 days 08:00:00,0 days 09:00:00,2024-06-19 08:27:57,2024-06-19 08:27:59,NaT
737,fecdfcd5-a98e-462c-8a3f-9664cffa6ba6,f1a8138a-580b-4c96-a719-861278ec3cf0,f705c980-6af0-4551-8510-e6cec15a12bb,111.674969,0.080795,60,1,0 days 08:00:00,0 days 09:00:00,2024-06-19 05:40:15,2024-06-19 05:40:17,NaT
738,ff0c3b85-e951-4ece-8329-a2b14065aa57,d9feeaf9-fdae-4904-9557-4754d409fbf8,7abe0dfd-a38c-43be-89c7-09fcdc6595c5,119.404290,-5.191177,180,3,0 days 08:00:00,0 days 09:00:00,2024-06-19 05:40:16,2024-06-19 05:40:17,NaT
739,ff83c993-6981-433c-abed-28db2fd8b26d,005e32da-f5fb-405c-9772-f6424c011fac,5917d6a0-f4a5-413b-9c2e-124b8a5c639b,110.337978,-7.845291,60,1,0 days 08:00:00,0 days 09:00:00,2024-06-19 05:40:15,2024-06-19 05:40:17,NaT


#### 4.5 users dataframe

In [76]:
users_df

Unnamed: 0,id,email,password,username,fullname,bio,phone_number,profile_image_url,gender,city,email_verified_at,created_at,updated_at,deleted_at,province,refresh_token
0,00862788-5ded-4065-8275-2569748f64aa,xifihi49190@huleos.com,$2a$10$X4GUlWtLjn/8M2cT7mOm/eD2nicI91W3mqm3mEI...,testverify10,John Doe,,081234567891,,,,NaT,2024-05-27 15:26:01.704,2024-05-27 15:26:01.704,2024-06-14 09:16:42.952,,
1,00bddbb8-8e7d-46ab-acfc-aab981c1ada1,irsyadyazidsyafiq@gmail.com,$2a$10$isVU6rM3rerOE4KzGvq5..faf7DwTLqTxiHXgGc...,yazid.syafiq,Yazid Syafiq Irsyad,,081393984849,https://res.cloudinary.com/alta-minpro/image/u...,Pria,Malang,2024-05-31 11:59:19.353,2024-05-31 11:58:43.404,2024-06-17 13:00:38.103,NaT,Jawa Timur,eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJJZCI6I...
2,03d7240b-3e9b-4105-ac93-a926b752bb81,shuuuuuuu@example.com,$2a$10$NvsLJLtnk.IW7gdbkOwK8Ob7uuAADw4o53ctlZW...,paimon,nama_lengkap,bio,081234567890,img.jpg,pria,Jaksel,NaT,2024-05-28 06:43:29.804,2024-06-05 14:44:35.107,NaT,Jakarta,
3,0483c5ae-046a-403a-a656-db920635396b,johndoe2345@example.com,$2a$10$Dxg27rgrHy1bZPiD0jNI8uInxr/duxnf3QEH8IS...,johndoe2443,John Doe,,081234567891,,,,NaT,2024-05-25 15:12:44.106,2024-05-25 15:12:44.106,2024-06-05 14:46:44.405,,
4,05d6e970-851b-4a8d-8869-587ebe5273cd,ulfiizza@gmail.com,$2a$10$2eFAkCeXNgTy69xF8gaIC.cdEOaiCB7q.olRjeK...,ulfi,ulfi izza,,089123456789,https://res.cloudinary.com/alta-minpro/image/u...,Wanita,malang,NaT,2024-06-13 03:35:00.083,2024-06-16 14:37:19.404,NaT,jawa timur,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
146,fa95b90d-41a8-44fe-aea8-e3b1553532c8,puterisalsaa8@gmail.com,$2a$10$fkklkqERHa.hDRZxlPHhp.dzWPhwSchFLKzC4y0...,inisalsa,Salsa Maulidina,,08115122238,,,,2024-06-10 12:12:21.233,2024-06-10 12:11:56.116,2024-06-10 12:12:23.104,NaT,,eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJJZCI6I...
147,fc21e23a-ca8c-4cf9-8c5d-f1a56b431022,xhyeni@gmail.com,$2a$10$QpNu5wnAwZQKjvZc3QijZ.mjhw7XSx5FSC.UmjE...,xhyeni,xhyeni ananda,,086182836712,https://res.cloudinary.com/alta-minpro/image/u...,Wanita,Malang,NaT,2024-06-19 06:14:44.495,2024-06-19 06:14:44.495,NaT,Jawa Timur,
148,fc9e86d9-e3ba-42eb-a46c-16ba07dfeb74,tajoli5069@javnoi.com,$2a$10$D5rUZTz5z2SkdRJMeogJLODp2Xp9kHSLTVtNTrC...,testverfiy25,Test Verify,,08123456789,,,,2024-05-28 07:16:51.573,2024-05-28 07:16:28.803,2024-05-28 07:16:51.576,NaT,,
149,fd13566c-bd12-4088-8537-2a52dbdca23b,Nausicaa5@example.com,$2a$10$mR0l7hjnGtU6PF9V1LHTFerCZXqK.w7vBy.M.h0...,Nausicaa5,John Doe,,081234567891,,,,NaT,2024-06-12 07:00:16.507,2024-06-12 07:00:16.507,NaT,,


#### 4.6 Closing connection

In [77]:
conn_rds.close()

### 5. Transformation

#### 5.1 Selecting subsets of each dataframe

In [78]:
# Kamus pemetaan bulan dalam bahasa Indonesia
month_mapping = {
    1: 'Jan', 2: 'Feb', 3: 'Mar', 4: 'Apr',
    5: 'Mei', 6: 'Jun', 7: 'Jul', 8: 'Agu',
    9: 'Sep', 10: 'Okt', 11: 'Nov', 12: 'Des'
}

# Ekstraksi tahun, bulan, dan hari
users_df['tahun'] = users_df['created_at'].dt.year
users_df['bulan'] = users_df['created_at'].dt.month.map(month_mapping)
users_df['tanggal'] = users_df['created_at'].dt.day
users_df['tanggallengkap'] = pd.to_datetime(users_df['created_at'].dt.date)

dim_destinations = destinations_df[['id', 'name', 'description', 'open_time', 'close_time', 'entry_price', 'longitude', 'latitude', 'visit_count']]
dim_routes = routes_df[['id', 'name', 'start_longitude', 'start_latitude', 'price']]
dim_route_details = route_details_df[['id', 'longitude', 'latitude', 'duration', 'order', 'visit_start', 'visit_end']]
dim_users = users_df[['id', 'email', 'username', 'fullname', 'phone_number', 'gender', 'city', 'province', 'tahun', 'bulan', 'tanggal', 'tanggallengkap']]

#### 5.2 destinations dimension

In [79]:
dim_destinations

Unnamed: 0,id,name,description,open_time,close_time,entry_price,longitude,latitude,visit_count
0,00319825-e81b-4a93-bf9a-eef836ccf6ce,Keraton Kasunanan Surakarta,Keraton Kasunanan Surakarta adalah istana resm...,9:00,14:00,20000.0,110.827903,-7.577736,293
1,005e32da-f5fb-405c-9772-f6424c011fac,Desa Wisata Kasongan,Desa yang terkenal dengan kerajinan gerabah da...,8:00,17:00,0.0,110.337978,-7.845291,243
2,01b8da3a-3987-4976-9ae7-46d6b8eacee5,Saung Angklung Udjo,Saung Angklung Udjo adalah pusat Seni dan Buda...,8:00,17:00,120000.0,107.654684,-6.897701,203
3,01ccaac9-97fe-42e1-8034-bc0c02471692,Museum Etnografi Sendawar,Museum Etnografi Sendawar merupakan museum per...,9:00,18:00,5000.0,115.701909,-0.232651,134
4,02834099-7ce7-44b9-8b27-e4db38f062bd,Danau Limboto,"Danau ini terletak di Kecamatan Limboto, tepat...",0:00,23:59,0.0,123.007543,0.577025,219
...,...,...,...,...,...,...,...,...,...
323,fbee819e-2b7b-44fe-8d5a-74dd345cea0e,Danau Tahai,Di kawasan objek wisata Danau Tahai ini terdap...,0:00,23:59,0.0,113.783202,-2.027312,136
324,fc780065-0848-467b-9625-9c24ecd2e9c2,Pantai Maaf,Pantai Maaf menawarkan perairan yang damai den...,6:00,18:00,10000.0,135.496749,-3.360245,163
325,fde8748a-2ec0-4211-89e5-12f3d63db81b,Piaynemo Raja Ampat,Piaynemo adalah salah satu ikon wisata di Raja...,10:00,17:00,150000.0,130.270813,-0.564206,123
326,fe1ebfe5-365f-4893-aa36-91e380bb2011,Wisata Pantai Gedo,Wisata Pantai Gedo adalah destinasi pantai ind...,6:00,18:00,15000.0,135.546550,-3.309413,299


#### 5.3 routes dimension

In [80]:
dim_routes

Unnamed: 0,id,name,start_longitude,start_latitude,price
0,002501e6-fa15-42da-a61f-57b17efb8ee8,Route ke 35,119.923853,-9.605230,335000.0
1,025d2240-3026-4a14-b26c-1e8384165e41,Route ke 10,114.691269,-6.602853,116000.0
2,026ab3bf-6b19-470b-9093-8820e3bc8384,Route ke 99,113.069697,7.631735,386000.0
3,02f6854d-1b57-4079-a2e3-394e9f037bb4,Liburan ke Bandung,107.620761,-6.916096,200000.0
4,0308d6d2-d28b-42d0-ad28-e64fd96bb2fd,Route ke 12,12.010000,12.010000,100000.0
...,...,...,...,...,...
252,f8fad44d-e5b1-4a3b-939e-1955bf9f7c67,Route ke 14,12.010000,12.010000,100000.0
253,f9ba7bf0-c6cd-4f4c-b18b-b4b08465234a,Route ke 28,112.975730,-4.493524,364000.0
254,f9cb8326-e5e2-4db8-b14b-3bf270e39741,Route ke 82,111.150113,-5.183633,240000.0
255,fa4765c0-a4d4-450e-b6f4-e41bb6743189,Route ke 111,117.008405,-3.332134,304000.0


#### 5.4 route_details dimension

In [81]:
dim_route_details['visit_start'] = dim_route_details['visit_start'].astype(str)
dim_route_details['visit_end'] = dim_route_details['visit_end'].astype(str)
dim_route_details

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dim_route_details['visit_start'] = dim_route_details['visit_start'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dim_route_details['visit_end'] = dim_route_details['visit_end'].astype(str)


Unnamed: 0,id,longitude,latitude,duration,order,visit_start,visit_end
0,00774a2d-c7ac-442a-b0b3-afab936306b6,111.633128,-2.675879,60,1,0 days 08:00:00,0 days 09:00:00
1,00bc0620-fec9-44f1-87b0-ba97e719bdd7,106.160397,-6.042848,120,2,0 days 08:00:00,0 days 09:00:00
2,00f932c8-a563-42b5-8579-7a08659daa36,107.621455,-6.900718,180,3,0 days 08:00:00,0 days 09:00:00
3,01992452-7884-4059-b530-d6b0fdb3fd13,117.178030,-5.901893,180,3,0 days 08:00:00,0 days 09:00:00
4,019fb0be-4664-49cb-99e1-813548ebf86b,134.098755,-0.894371,60,1,0 days 08:00:00,0 days 09:00:00
...,...,...,...,...,...,...,...
736,fe65d059-a13a-4cc7-8b57-846291fe1369,111.633128,-2.675879,180,3,0 days 08:00:00,0 days 09:00:00
737,fecdfcd5-a98e-462c-8a3f-9664cffa6ba6,111.674969,0.080795,60,1,0 days 08:00:00,0 days 09:00:00
738,ff0c3b85-e951-4ece-8329-a2b14065aa57,119.404290,-5.191177,180,3,0 days 08:00:00,0 days 09:00:00
739,ff83c993-6981-433c-abed-28db2fd8b26d,110.337978,-7.845291,60,1,0 days 08:00:00,0 days 09:00:00


#### 5.5 users dimension

In [82]:
dim_users

Unnamed: 0,id,email,username,fullname,phone_number,gender,city,province,tahun,bulan,tanggal,tanggallengkap
0,00862788-5ded-4065-8275-2569748f64aa,xifihi49190@huleos.com,testverify10,John Doe,081234567891,,,,2024,Mei,27,2024-05-27
1,00bddbb8-8e7d-46ab-acfc-aab981c1ada1,irsyadyazidsyafiq@gmail.com,yazid.syafiq,Yazid Syafiq Irsyad,081393984849,Pria,Malang,Jawa Timur,2024,Mei,31,2024-05-31
2,03d7240b-3e9b-4105-ac93-a926b752bb81,shuuuuuuu@example.com,paimon,nama_lengkap,081234567890,pria,Jaksel,Jakarta,2024,Mei,28,2024-05-28
3,0483c5ae-046a-403a-a656-db920635396b,johndoe2345@example.com,johndoe2443,John Doe,081234567891,,,,2024,Mei,25,2024-05-25
4,05d6e970-851b-4a8d-8869-587ebe5273cd,ulfiizza@gmail.com,ulfi,ulfi izza,089123456789,Wanita,malang,jawa timur,2024,Jun,13,2024-06-13
...,...,...,...,...,...,...,...,...,...,...,...,...
146,fa95b90d-41a8-44fe-aea8-e3b1553532c8,puterisalsaa8@gmail.com,inisalsa,Salsa Maulidina,08115122238,,,,2024,Jun,10,2024-06-10
147,fc21e23a-ca8c-4cf9-8c5d-f1a56b431022,xhyeni@gmail.com,xhyeni,xhyeni ananda,086182836712,Wanita,Malang,Jawa Timur,2024,Jun,19,2024-06-19
148,fc9e86d9-e3ba-42eb-a46c-16ba07dfeb74,tajoli5069@javnoi.com,testverfiy25,Test Verify,08123456789,,,,2024,Mei,28,2024-05-28
149,fd13566c-bd12-4088-8537-2a52dbdca23b,Nausicaa5@example.com,Nausicaa5,John Doe,081234567891,,,,2024,Jun,12,2024-06-12


#### 5.6 routes_fact

In [83]:
# merging dataframes
merged_df = pd.merge(routes_df, route_details_df, left_on='id', right_on='route_id', suffixes=('_routes', '_route_details'))
merged_df = pd.merge(merged_df, users_df, left_on='user_id', right_on='id', suffixes=('_routes', '_users'))
merged_df = pd.merge(merged_df, destinations_df, left_on='destination_id', right_on='id', suffixes=('_routes_users', '_destinations'))

# Pilih subset kolom
routes_fact = merged_df[['id_routes', 'user_id', 'id_route_details', 'id_destinations']]

# renaming some of columns name
routes_fact = routes_fact.rename(columns={"id_routes": "route_id", 
                                          "id_route_details": "route_details_id", 
                                          "id_destinations": "destinations_id"})

# adding user_count and route_count columns
routes_fact['user_count'] = 1
routes_fact['route_count'] = 1


# display the dataframe
routes_fact

Unnamed: 0,route_id,user_id,route_details_id,destinations_id,user_count,route_count
0,002501e6-fa15-42da-a61f-57b17efb8ee8,a8bddfb7-075a-4e3e-bfb0-c4a5f9d270d2,51489bf7-47e8-49e8-b0a1-6ec4ebb61ef8,8ea205c8-2f85-4f34-a1af-5a22001f9b5b,1,1
1,99a4414a-25e0-4f93-a647-a603e9736545,596f22e4-b0c0-4c0b-8703-cb460eb22ce8,3072178c-39b8-47d8-95e8-5b59131e6feb,8ea205c8-2f85-4f34-a1af-5a22001f9b5b,1,1
2,002501e6-fa15-42da-a61f-57b17efb8ee8,a8bddfb7-075a-4e3e-bfb0-c4a5f9d270d2,bb83ede0-e155-4137-8aee-f7b7cf0f0363,affe6e66-db67-408c-8914-497c7338ff72,1,1
3,561ddc3b-298e-4aa3-b1cf-f4153968c517,f7df5114-238c-4dae-b977-73e80e50a159,54a5e027-6bf0-49b7-a58c-972c94ce985f,affe6e66-db67-408c-8914-497c7338ff72,1,1
4,335aebc9-3335-4967-a805-396a510570c0,7a5a6f3b-9134-43ce-b0ed-18b93050379a,06b7d9b5-e0df-4c62-905a-304036b7892c,affe6e66-db67-408c-8914-497c7338ff72,1,1
...,...,...,...,...,...,...
736,d9a64f16-d5f7-47e1-8449-233ccfb9fe8f,98ad0963-e402-402a-b119-f5b551242559,61f5cd62-ceaa-4225-9230-0b1b9147436d,94c2f51e-9b60-4237-acc2-6dd30c8769fb,1,1
737,d9a64f16-d5f7-47e1-8449-233ccfb9fe8f,98ad0963-e402-402a-b119-f5b551242559,c9edaed2-de8c-4f7b-97d5-53300d160388,4f4ba465-d0ed-4950-a6c6-51770fba7b9a,1,1
738,f8a85da6-4f1f-4514-90c5-60eca62e34b2,d3147562-84b4-4655-9c01-bc9ee1e5f21a,4c645b15-1e25-48ad-a0f2-621d9a0588d1,227fc1b1-0aa4-4c7a-9ad7-38bb1210c47c,1,1
739,f8a85da6-4f1f-4514-90c5-60eca62e34b2,d3147562-84b4-4655-9c01-bc9ee1e5f21a,ade557bb-0883-4931-9b43-6eab00662c35,18515a8f-09e4-4522-8307-81d7fdeb8214,1,1


### 6. Load

#### load _to_gbq function below is used to load extract data from a dataframe into a table in google big query

In [84]:
def load_to_gbq(credentials, project_id, dataset_id, table_names, dataframes):
   for df, table_name in zip(dataframes, table_names):
      table_full_id = f'{project_id}.{dataset_id}.{table_name}'
      to_gbq(df, table_full_id, project_id=project_id, if_exists='replace', credentials=credentials)
      print(f'Table {table_name} loaded successfully!')

#### 6.1 Load to Big Query

In [85]:
credentials = service_account.Credentials.from_service_account_file(google_application_credentials)

dfs = [dim_destinations, dim_routes, dim_route_details, dim_users, routes_fact]
tables = ['dim_destinations', 'dim_routes', 'dim_route_details', 'dim_users', 'routes_fact']

load_to_gbq(credentials, project_id, dataset_id, tables, dfs)

100%|██████████| 1/1 [00:00<00:00, 912.00it/s]


Table dim_destinations loaded successfully!


100%|██████████| 1/1 [00:00<00:00, 982.73it/s]


Table dim_routes loaded successfully!


100%|██████████| 1/1 [00:00<00:00, 988.29it/s]


Table dim_route_details loaded successfully!


100%|██████████| 1/1 [00:00<?, ?it/s]


Table dim_users loaded successfully!


100%|██████████| 1/1 [00:00<?, ?it/s]

Table routes_fact loaded successfully!



