### 1. Install Library

In [1]:
# %pip install pandas-gbq google-cloud-bigquery
# %pip install pandas-gbq google-auth google-auth-oauthlib

### 2. Import Library

In [2]:
import pandas as pd
import mysql.connector
from google.oauth2 import service_account
from google.cloud import bigquery
import pandas_gbq
from pandas_gbq import to_gbq
import os
from dotenv import load_dotenv
from datetime import datetime, timedelta

### 3. Database Configuration

In [3]:
# Load konfigurasi dari .env
load_dotenv()

# MySQL configurations
rds_host = os.getenv('RDS_HOST')
rds_dbname = os.getenv('RDS_DBNAME')
rds_user = os.getenv('RDS_USER')
rds_password = os.getenv('RDS_PASSWORD')

# Google Cloud configurations
project_id = os.getenv('GOOGLE_CLOUD_PROJECT_ID')
dataset_id = os.getenv('GOOGLE_CLOUD_DATASET_ID')
google_application_credentials = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = google_application_credentials

# Koneksi ke MySQL di Amazon RDS
conn_rds = mysql.connector.connect(
    host=rds_host,
    database=rds_dbname,
    user=rds_user,
    password=rds_password
)

### 4. Extract

#### table_to_df function below is used to extract data from a table and convert it into a dataframe

In [4]:
def table_to_df(table_name):
   query = f"SELECT * FROM {table_name}"
   df = pd.read_sql_query(query, conn_rds)
   return df

#### 4.1 Converting data from tables to dataframes

In [5]:
destinations_df = table_to_df('destinations')
routes_df = table_to_df('routes')
route_details_df = table_to_df('route_details')
users_df = table_to_df('users')

  df = pd.read_sql_query(query, conn_rds)


#### 4.2 destinations dataframe

In [6]:
destinations_df

Unnamed: 0,id,category_id,name,description,open_time,close_time,entry_price,longitude,latitude,visit_count,created_at,updated_at,deleted_at
0,00319825-e81b-4a93-bf9a-eef836ccf6ce,eb77b590-b255-4ea1-b11a-d445a259ac62,Keraton Kasunanan Surakarta,Keraton Kasunanan Surakarta adalah istana resm...,9:00,14:00,20000.0,110.827903,-7.577736,293,2024-06-11 08:28:33,2024-06-20 09:53:01,2024-06-20 09:53:01.786
1,005e32da-f5fb-405c-9772-f6424c011fac,eb77b590-b255-4ea1-b11a-d445a259ac62,Desa Wisata Kasongan,Desa yang terkenal dengan kerajinan gerabah da...,8:00,17:00,0.0,110.337978,-7.845291,243,2024-06-11 08:28:27,2024-06-20 09:55:01,2024-06-20 09:55:01.453
2,01b8da3a-3987-4976-9ae7-46d6b8eacee5,eb77b590-b255-4ea1-b11a-d445a259ac62,Saung Angklung Udjo,Saung Angklung Udjo adalah pusat Seni dan Buda...,8:00,17:00,120000.0,107.654684,-6.897701,203,2024-06-11 08:28:33,2024-06-20 09:55:14,2024-06-20 09:55:14.647
3,01ccaac9-97fe-42e1-8034-bc0c02471692,eb77b590-b255-4ea1-b11a-d445a259ac62,Museum Etnografi Sendawar,Museum Etnografi Sendawar merupakan museum per...,9:00,18:00,5000.0,115.701909,-0.232651,134,2024-06-11 08:28:20,2024-06-20 10:00:54,2024-06-20 10:00:54.207
4,02834099-7ce7-44b9-8b27-e4db38f062bd,eb77b590-b255-4ea1-b11a-d445a259ac61,Danau Limboto,"Danau ini terletak di Kecamatan Limboto, tepat...",0:00,23:59,0.0,123.007543,0.577025,219,2024-06-11 08:28:21,2024-06-20 10:02:09,2024-06-20 10:02:09.429
...,...,...,...,...,...,...,...,...,...,...,...,...,...
428,fbee819e-2b7b-44fe-8d5a-74dd345cea0e,eb77b590-b255-4ea1-b11a-d445a259ac61,Danau Tahai,Di kawasan objek wisata Danau Tahai ini terdap...,0:00,23:59,0.0,113.783202,-2.027312,136,2024-06-11 08:28:19,2024-06-11 08:28:19,NaT
429,fc780065-0848-467b-9625-9c24ecd2e9c2,eb77b590-b255-4ea1-b11a-d445a259ac61,Pantai Maaf,Pantai Maaf menawarkan perairan yang damai den...,6:00,18:00,10000.0,135.496749,-3.360245,163,2024-06-11 08:28:32,2024-06-11 08:28:32,NaT
430,fde8748a-2ec0-4211-89e5-12f3d63db81b,eb77b590-b255-4ea1-b11a-d445a259ac61,Piaynemo Raja Ampat,Piaynemo adalah salah satu ikon wisata di Raja...,10:00,17:00,150000.0,130.270813,-0.564206,123,2024-06-11 08:28:32,2024-06-11 08:28:32,NaT
431,fe1ebfe5-365f-4893-aa36-91e380bb2011,eb77b590-b255-4ea1-b11a-d445a259ac61,Wisata Pantai Gedo,Wisata Pantai Gedo adalah destinasi pantai ind...,6:00,18:00,15000.0,135.546550,-3.309413,300,2024-06-11 08:28:32,2024-06-23 06:37:28,NaT


#### 4.3 routes dataframe

In [7]:
routes_df

Unnamed: 0,id,user_id,city_id,name,start_location,start_longitude,start_latitude,price,created_at,updated_at,deleted_at
0,0170287c-00ad-4c97-a873-7ef6e13be7fb,16c77b92-1cb6-476e-8b58-dbffbb0ec8b9,1306,ruut,"Jl. Polowijen 1 No.4, Polowijen Kecamatan Blim...",112.648475,-7.928018,3500000.0,2024-06-21 15:44:00,2024-06-23 23:53:48,2024-06-23 23:53:48.933
1,02dcc9f2-1970-4a36-9031-b434a618e1a7,16c77b92-1cb6-476e-8b58-dbffbb0ec8b9,3204,asdasd,"Jl. Polowijen 1 No.4, Polowijen Kecamatan Blim...",112.648475,-7.928018,30000.0,2024-06-21 16:25:42,2024-06-24 01:16:26,2024-06-24 01:16:26.815
2,02f6854d-1b57-4079-a2e3-394e9f037bb4,ac91e0a6-0c10-4d88-b253-294b6eb661cf,3204,Liburan ke Bandung,Gedung Sate,107.620761,-6.916096,200000.0,2024-06-19 07:35:14,2024-06-21 08:56:09,2024-06-21 08:56:09.855
3,0458f0ec-4bbe-4950-99ad-afd5f48762d3,a48e02bb-6a68-4586-85e4-b038d795da5b,3204,bandung day 1,"1600 Amphitheatre Pkwy Building 43, Mountain ...",-122.084000,37.421998,200000.0,2024-06-23 09:15:22,2024-06-23 09:15:22,NaT
4,08aa6f3c-acb9-424d-8a74-7ab2605c9965,16c77b92-1cb6-476e-8b58-dbffbb0ec8b9,1104,asd,"Jl. Polowijen 1 No.4, Polowijen Kecamatan Blim...",112.648475,-7.928018,500000.0,2024-06-21 16:28:23,2024-06-23 08:53:19,2024-06-23 08:53:19.107
...,...,...,...,...,...,...,...,...,...,...,...
118,f7eb9190-deb0-4d46-9e3e-6e68a886857c,16c77b92-1cb6-476e-8b58-dbffbb0ec8b9,9304,ruyut,"2JMM+932, Samaan Klojen Malang City, East Java...",21.000000,-7.966620,50000.0,2024-06-21 13:21:52,2024-06-21 13:21:52,NaT
119,fca75cfb-7cee-46bf-b505-68ecd0a52a11,16c77b92-1cb6-476e-8b58-dbffbb0ec8b9,7503,ruutte,"2JMM+932, Samaan Klojen Malang City, East Java...",21.000000,-7.966620,50000.0,2024-06-21 12:22:30,2024-06-21 12:22:30,NaT
120,fcd77473-4635-478f-9aff-62a1e98ef274,16c77b92-1cb6-476e-8b58-dbffbb0ec8b9,7503,ruutte,"2JMM+932, Samaan Klojen Malang City, East Java...",21.000000,-7.966620,50000.0,2024-06-21 12:21:34,2024-06-21 12:21:34,NaT
121,ff93ad6e-0e2a-484b-87eb-ad57fb75a6d7,16c77b92-1cb6-476e-8b58-dbffbb0ec8b9,9304,ruut,"2JMM+932, Samaan Klojen Malang City, East Java...",21.000000,-7.966620,50000.0,2024-06-21 12:26:46,2024-06-21 12:26:46,NaT


#### 4.4 route_details dataframe

In [8]:
route_details_df

Unnamed: 0,id,destination_id,route_id,longitude,latitude,duration,order,visit_start,visit_end,created_at,updated_at,deleted_at
0,01ba3f24-769f-4d01-a8a2-4bb88355fba5,10d45453-c83f-4d23-bcbe-5373ddcb8807,a2ffb695-4828-4685-865d-4f63a194a272,107.576041,-6.798706,64800,1,0 days 09:00:00,0 days 10:30:00,2024-06-21 16:31:30,2024-06-21 16:31:30,NaT
1,01d3401f-0e80-4181-b31f-2399ba20d5ff,9ccd422e-50ab-4c4d-b713-598ca0311b77,72d5d30f-2f44-4ade-b384-1cfc1f209b45,100.217877,-0.353873,97920,1,0 days 11:30:00,0 days 13:00:00,2024-06-21 16:14:46,2024-06-21 16:14:46,NaT
2,0376514c-37af-49da-b9b5-1f934bcc0827,5299d103-851a-4cef-b66f-dfc85a9a6ed0,940eff30-b415-491f-8408-8bdaf95e0adb,107.207205,-7.018241,10800,2,0 days 11:30:00,0 days 12:30:00,2024-06-21 16:30:39,2024-06-21 16:30:39,NaT
3,03dab2c8-7290-41e2-a07e-916824d12ca8,45c50f14-480f-446e-bf5c-e27b4aabfe0c,b3227e41-daff-48e9-a232-e9dc3fc2e328,96.996653,4.592379,21600,1,0 days 11:30:00,0 days 15:00:00,2024-06-21 16:17:07,2024-06-21 16:17:07,NaT
4,04b5cb11-69e2-4175-9fb4-3fc45e71c3b8,aab8d2dc-d968-4233-92c4-6dc19ae143c6,14f17375-f45f-46cd-9025-b813a9ef6075,106.889957,-6.301783,18000,1,0 days 16:30:00,0 days 18:00:00,2024-06-21 16:03:41,2024-06-21 16:03:41,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...
182,f4449f3e-995d-458b-9ed1-a94e338cc1ac,4ce34993-985e-4e27-a31e-4753e6b8ef23,2eef2911-7f83-4a0a-90fa-8dbdb36987b2,103.159275,-4.022511,2700,2,0 days 11:00:00,0 days 11:45:00,2024-06-23 06:50:01,2024-06-23 06:50:01,NaT
183,f5009d18-3e42-4832-9f62-be7d5fdb25e8,b3bbb0ed-c092-4cc8-af76-59f883e51738,30dca53e-f902-49ca-812a-38a586b386d3,116.461640,-8.410276,5400,1,0 days 07:00:00,0 days 14:00:00,2024-06-23 13:17:14,2024-06-23 13:17:14,NaT
184,f6054c5d-d8a7-4800-96f3-e5c07fcf5af2,eb8122b0-9f90-46f1-9bd2-5366fe0a40c7,cc53f754-4d5c-4bdd-b9d9-9cf44ba8b187,114.573132,-3.293217,1200,2,0 days 14:00:00,0 days 15:30:00,2024-06-23 08:08:46,2024-06-23 08:08:46,NaT
185,fc33e4ac-09ee-4192-bf1b-1fffc89fa09c,6a724317-15aa-4890-ab1f-7758106ad6d2,cd5bd9b5-1072-40ee-9b57-afb22d8b894e,100.192011,-0.305623,25200,1,0 days 10:00:00,0 days 11:00:00,2024-06-21 13:28:50,2024-06-21 13:28:50,NaT


#### 4.5 users dataframe

In [9]:
users_df

Unnamed: 0,id,email,password,username,fullname,bio,phone_number,profile_image_url,gender,city,email_verified_at,created_at,updated_at,deleted_at,province,refresh_token
0,00862788-5ded-4065-8275-2569748f64aa,xifihi49190@huleos.com,$2a$10$ToBK0ZSXzQHhU9KEod/gk.jk.Fr5VlxUkdz.dLl...,testverify10,John Doe,,081234567891,,,,2024-06-22 09:00:44.000,2024-02-28 19:19:30.231,2024-05-27 15:26:01.704,2024-06-23 08:48:48.470,,
1,00bddbb8-8e7d-46ab-acfc-aab981c1ada1,irsyadyazidsyafiq@gmail.com,$2a$10$kcd7TdvXiS9QNHvFcLlsEe.pqUerZd3cEM7IKZc...,yazid.syafiq,Yazid Syafiq,,081393984849,https://res.cloudinary.com/alta-minpro/image/u...,Pria,Pringsewu,2024-05-31 11:59:19.353,2024-05-18 03:32:14.211,2024-06-24 01:31:26.004,NaT,Lampung,eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJJZCI6I...
2,03d7240b-3e9b-4105-ac93-a926b752bb81,shuu@example.com,$2a$10$IMijbn2w2PxgbgBW5399f.mXxUhLcHL9Nuv29lT...,paimon,nama_lengkap,,081234567890,img.jpg,Wanita,Jaksel,NaT,2024-06-16 06:33:53.904,2024-06-23 11:22:47.505,NaT,Jakarta,
3,0483c5ae-046a-403a-a656-db920635396b,johndoe2345@example.com,$2a$10$Dxg27rgrHy1bZPiD0jNI8uInxr/duxnf3QEH8IS...,johndoe2443,John Doe,,081234567891,,,,NaT,2024-03-15 21:04:00.423,2024-05-25 15:12:44.106,2024-06-05 14:46:44.405,,
4,05d6e970-851b-4a8d-8869-587ebe5273cd,ulfiizza@gmail.com,$2a$10$2eFAkCeXNgTy69xF8gaIC.cdEOaiCB7q.olRjeK...,ulfi,ulfi izza,,089123456789,https://res.cloudinary.com/alta-minpro/image/u...,Wanita,malang,NaT,2024-02-16 14:12:42.942,2024-06-16 14:37:19.404,NaT,jawa timur,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
175,fa95b90d-41a8-44fe-aea8-e3b1553532c8,puterisalsaa8@gmail.com,$2a$10$fkklkqERHa.hDRZxlPHhp.dzWPhwSchFLKzC4y0...,inisalsa,Salsa Maulidina,,08115122238,,,,2024-06-10 12:12:21.233,2024-02-06 22:12:37.382,2024-06-10 12:12:23.104,NaT,,eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJJZCI6I...
176,fc21e23a-ca8c-4cf9-8c5d-f1a56b431022,xhyeni@gmail.com,$2a$10$QpNu5wnAwZQKjvZc3QijZ.mjhw7XSx5FSC.UmjE...,xhyeni,xhyeni ananda,,086182836712,https://res.cloudinary.com/alta-minpro/image/u...,Wanita,Malang,NaT,2024-01-28 06:02:07.294,2024-06-19 06:14:44.495,NaT,Jawa Timur,
177,fc9e86d9-e3ba-42eb-a46c-16ba07dfeb74,tajoli5069@javnoi.com,$2a$10$D5rUZTz5z2SkdRJMeogJLODp2Xp9kHSLTVtNTrC...,testverfiy25,Test Verify,,08123456789,,,,2024-05-28 07:16:51.573,2024-01-26 19:32:43.861,2024-05-28 07:16:51.576,NaT,,
178,fd13566c-bd12-4088-8537-2a52dbdca23b,Nausicaa5@example.com,$2a$10$mR0l7hjnGtU6PF9V1LHTFerCZXqK.w7vBy.M.h0...,Nausicaa5,John Doe,,081234567891,,,,NaT,2024-02-17 15:37:16.964,2024-06-12 07:00:16.507,NaT,,


#### 4.6 Closing connection

In [10]:
conn_rds.close()

### 5. Transformation

#### 5.1 Selecting subsets of each dataframe

In [11]:
# Kamus pemetaan bulan dalam bahasa Indonesia
month_mapping = {
    1: 'Jan', 2: 'Feb', 3: 'Mar', 4: 'Apr',
    5: 'Mei', 6: 'Jun', 7: 'Jul', 8: 'Agu',
    9: 'Sep', 10: 'Okt', 11: 'Nov', 12: 'Des'
}

# Ekstraksi tahun, bulan, dan hari
users_df['tahun'] = users_df['created_at'].dt.year
users_df['bulan'] = users_df['created_at'].dt.month.map(month_mapping)
users_df['tanggal'] = users_df['created_at'].dt.day
users_df['tanggallengkap'] = pd.to_datetime(users_df['created_at'].dt.date)

dim_destinations = destinations_df[['id', 'name', 'description', 'open_time', 'close_time', 'entry_price', 'longitude', 'latitude', 'visit_count']]
dim_routes = routes_df[['id', 'name', 'start_longitude', 'start_latitude', 'price']]
dim_route_details = route_details_df[['id', 'longitude', 'latitude', 'duration', 'order', 'visit_start', 'visit_end']]
dim_users = users_df[['id', 'email', 'username', 'fullname', 'phone_number', 'gender', 'city', 'province', 'tahun', 'bulan', 'tanggal', 'tanggallengkap']]

#### 5.2 destinations dimension

In [12]:
dim_destinations

Unnamed: 0,id,name,description,open_time,close_time,entry_price,longitude,latitude,visit_count
0,00319825-e81b-4a93-bf9a-eef836ccf6ce,Keraton Kasunanan Surakarta,Keraton Kasunanan Surakarta adalah istana resm...,9:00,14:00,20000.0,110.827903,-7.577736,293
1,005e32da-f5fb-405c-9772-f6424c011fac,Desa Wisata Kasongan,Desa yang terkenal dengan kerajinan gerabah da...,8:00,17:00,0.0,110.337978,-7.845291,243
2,01b8da3a-3987-4976-9ae7-46d6b8eacee5,Saung Angklung Udjo,Saung Angklung Udjo adalah pusat Seni dan Buda...,8:00,17:00,120000.0,107.654684,-6.897701,203
3,01ccaac9-97fe-42e1-8034-bc0c02471692,Museum Etnografi Sendawar,Museum Etnografi Sendawar merupakan museum per...,9:00,18:00,5000.0,115.701909,-0.232651,134
4,02834099-7ce7-44b9-8b27-e4db38f062bd,Danau Limboto,"Danau ini terletak di Kecamatan Limboto, tepat...",0:00,23:59,0.0,123.007543,0.577025,219
...,...,...,...,...,...,...,...,...,...
428,fbee819e-2b7b-44fe-8d5a-74dd345cea0e,Danau Tahai,Di kawasan objek wisata Danau Tahai ini terdap...,0:00,23:59,0.0,113.783202,-2.027312,136
429,fc780065-0848-467b-9625-9c24ecd2e9c2,Pantai Maaf,Pantai Maaf menawarkan perairan yang damai den...,6:00,18:00,10000.0,135.496749,-3.360245,163
430,fde8748a-2ec0-4211-89e5-12f3d63db81b,Piaynemo Raja Ampat,Piaynemo adalah salah satu ikon wisata di Raja...,10:00,17:00,150000.0,130.270813,-0.564206,123
431,fe1ebfe5-365f-4893-aa36-91e380bb2011,Wisata Pantai Gedo,Wisata Pantai Gedo adalah destinasi pantai ind...,6:00,18:00,15000.0,135.546550,-3.309413,300


#### 5.3 routes dimension

In [13]:
dim_routes

Unnamed: 0,id,name,start_longitude,start_latitude,price
0,0170287c-00ad-4c97-a873-7ef6e13be7fb,ruut,112.648475,-7.928018,3500000.0
1,02dcc9f2-1970-4a36-9031-b434a618e1a7,asdasd,112.648475,-7.928018,30000.0
2,02f6854d-1b57-4079-a2e3-394e9f037bb4,Liburan ke Bandung,107.620761,-6.916096,200000.0
3,0458f0ec-4bbe-4950-99ad-afd5f48762d3,bandung day 1,-122.084000,37.421998,200000.0
4,08aa6f3c-acb9-424d-8a74-7ab2605c9965,asd,112.648475,-7.928018,500000.0
...,...,...,...,...,...
118,f7eb9190-deb0-4d46-9e3e-6e68a886857c,ruyut,21.000000,-7.966620,50000.0
119,fca75cfb-7cee-46bf-b505-68ecd0a52a11,ruutte,21.000000,-7.966620,50000.0
120,fcd77473-4635-478f-9aff-62a1e98ef274,ruutte,21.000000,-7.966620,50000.0
121,ff93ad6e-0e2a-484b-87eb-ad57fb75a6d7,ruut,21.000000,-7.966620,50000.0


#### 5.4 route_details dimension

In [14]:
dim_route_details['visit_start'] = dim_route_details['visit_start'].astype(str)
dim_route_details['visit_end'] = dim_route_details['visit_end'].astype(str)
dim_route_details

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dim_route_details['visit_start'] = dim_route_details['visit_start'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dim_route_details['visit_end'] = dim_route_details['visit_end'].astype(str)


Unnamed: 0,id,longitude,latitude,duration,order,visit_start,visit_end
0,01ba3f24-769f-4d01-a8a2-4bb88355fba5,107.576041,-6.798706,64800,1,0 days 09:00:00,0 days 10:30:00
1,01d3401f-0e80-4181-b31f-2399ba20d5ff,100.217877,-0.353873,97920,1,0 days 11:30:00,0 days 13:00:00
2,0376514c-37af-49da-b9b5-1f934bcc0827,107.207205,-7.018241,10800,2,0 days 11:30:00,0 days 12:30:00
3,03dab2c8-7290-41e2-a07e-916824d12ca8,96.996653,4.592379,21600,1,0 days 11:30:00,0 days 15:00:00
4,04b5cb11-69e2-4175-9fb4-3fc45e71c3b8,106.889957,-6.301783,18000,1,0 days 16:30:00,0 days 18:00:00
...,...,...,...,...,...,...,...
182,f4449f3e-995d-458b-9ed1-a94e338cc1ac,103.159275,-4.022511,2700,2,0 days 11:00:00,0 days 11:45:00
183,f5009d18-3e42-4832-9f62-be7d5fdb25e8,116.461640,-8.410276,5400,1,0 days 07:00:00,0 days 14:00:00
184,f6054c5d-d8a7-4800-96f3-e5c07fcf5af2,114.573132,-3.293217,1200,2,0 days 14:00:00,0 days 15:30:00
185,fc33e4ac-09ee-4192-bf1b-1fffc89fa09c,100.192011,-0.305623,25200,1,0 days 10:00:00,0 days 11:00:00


#### 5.5 users dimension

In [15]:
dim_users

Unnamed: 0,id,email,username,fullname,phone_number,gender,city,province,tahun,bulan,tanggal,tanggallengkap
0,00862788-5ded-4065-8275-2569748f64aa,xifihi49190@huleos.com,testverify10,John Doe,081234567891,,,,2024,Feb,28,2024-02-28
1,00bddbb8-8e7d-46ab-acfc-aab981c1ada1,irsyadyazidsyafiq@gmail.com,yazid.syafiq,Yazid Syafiq,081393984849,Pria,Pringsewu,Lampung,2024,Mei,18,2024-05-18
2,03d7240b-3e9b-4105-ac93-a926b752bb81,shuu@example.com,paimon,nama_lengkap,081234567890,Wanita,Jaksel,Jakarta,2024,Jun,16,2024-06-16
3,0483c5ae-046a-403a-a656-db920635396b,johndoe2345@example.com,johndoe2443,John Doe,081234567891,,,,2024,Mar,15,2024-03-15
4,05d6e970-851b-4a8d-8869-587ebe5273cd,ulfiizza@gmail.com,ulfi,ulfi izza,089123456789,Wanita,malang,jawa timur,2024,Feb,16,2024-02-16
...,...,...,...,...,...,...,...,...,...,...,...,...
175,fa95b90d-41a8-44fe-aea8-e3b1553532c8,puterisalsaa8@gmail.com,inisalsa,Salsa Maulidina,08115122238,,,,2024,Feb,6,2024-02-06
176,fc21e23a-ca8c-4cf9-8c5d-f1a56b431022,xhyeni@gmail.com,xhyeni,xhyeni ananda,086182836712,Wanita,Malang,Jawa Timur,2024,Jan,28,2024-01-28
177,fc9e86d9-e3ba-42eb-a46c-16ba07dfeb74,tajoli5069@javnoi.com,testverfiy25,Test Verify,08123456789,,,,2024,Jan,26,2024-01-26
178,fd13566c-bd12-4088-8537-2a52dbdca23b,Nausicaa5@example.com,Nausicaa5,John Doe,081234567891,,,,2024,Feb,17,2024-02-17


#### 5.6 routes_fact

In [16]:
# merging dataframes
merged_df = pd.merge(routes_df, route_details_df, left_on='id', right_on='route_id', suffixes=('_routes', '_route_details'))
merged_df = pd.merge(merged_df, users_df, left_on='user_id', right_on='id', suffixes=('_routes', '_users'))
merged_df = pd.merge(merged_df, destinations_df, left_on='destination_id', right_on='id', suffixes=('_routes_users', '_destinations'))

# Pilih subset kolom
routes_fact = merged_df[['id_routes', 'user_id', 'id_route_details', 'id_destinations']]

# renaming some of columns name
routes_fact = routes_fact.rename(columns={"id_routes": "route_id", 
                                          "id_route_details": "route_details_id", 
                                          "id_destinations": "destinations_id"})

# adding user_count and route_count columns
routes_fact['user_count'] = 1
routes_fact['route_count'] = 1

# Menghitung total destinations untuk setiap route_id
routes_fact['total_destination'] = routes_fact.groupby('route_id')['destinations_id'].transform('count')

# display the dataframe
routes_fact

Unnamed: 0,route_id,user_id,route_details_id,destinations_id,user_count,route_count,total_destination
0,0170287c-00ad-4c97-a873-7ef6e13be7fb,16c77b92-1cb6-476e-8b58-dbffbb0ec8b9,29736670-002b-4f6a-8964-ecdb11bd688b,6a724317-15aa-4890-ab1f-7758106ad6d2,1,1,1
1,11ff1820-7fa5-4be9-94ae-c46759afa1dc,16c77b92-1cb6-476e-8b58-dbffbb0ec8b9,71c5f7f0-0d75-49ed-974a-cb79425f61b1,6a724317-15aa-4890-ab1f-7758106ad6d2,1,1,2
2,134eba7b-2875-4728-b8fa-658b0ec3c3e4,16c77b92-1cb6-476e-8b58-dbffbb0ec8b9,94c8dcc2-0069-4a1f-b447-56c6a1d8e665,6a724317-15aa-4890-ab1f-7758106ad6d2,1,1,1
3,1566f86e-80a4-4cd4-b65c-936881e08966,16c77b92-1cb6-476e-8b58-dbffbb0ec8b9,eac8b16c-38e6-44e2-b523-2c9d7429fe26,6a724317-15aa-4890-ab1f-7758106ad6d2,1,1,2
4,30c1f899-2b4e-484a-8434-ac435f51f961,16c77b92-1cb6-476e-8b58-dbffbb0ec8b9,7bb7fd1a-a609-456d-ad59-ec9c6c09b88d,6a724317-15aa-4890-ab1f-7758106ad6d2,1,1,1
...,...,...,...,...,...,...,...
182,a246937b-4c7c-46d9-851d-cae90f527b38,30df1cf7-25c2-4d7c-9f25-74634c63f177,b2c11642-01b2-4d7e-9dce-9bd669c56fef,d6a99094-4263-4982-9fd3-e424ce1021cf,1,1,3
183,bceb830e-f896-41b5-8539-d0097f784f04,30df1cf7-25c2-4d7c-9f25-74634c63f177,0e2b9226-70a6-45e3-86ee-f3352ab4e559,859934c8-6dbb-4cb0-9222-5de58d353ad6,1,1,2
184,bceb830e-f896-41b5-8539-d0097f784f04,30df1cf7-25c2-4d7c-9f25-74634c63f177,32a45a19-ca30-4fdd-987d-755f3453ac9a,fde8748a-2ec0-4211-89e5-12f3d63db81b,1,1,2
185,cc53f754-4d5c-4bdd-b9d9-9cf44ba8b187,36dbd1bf-3aba-4c54-bb44-382fb58e9097,4cc95711-2aee-488b-ab93-2541d0ef077a,bc078485-80b6-4aa9-8ebc-8dea650ba673,1,1,2


### 6. Load

#### load _to_gbq function below is used to load extract data from a dataframe into a table in google big query

In [17]:
def load_to_gbq(credentials, project_id, dataset_id, table_names, dataframes):
   for df, table_name in zip(dataframes, table_names):
      table_full_id = f'{project_id}.{dataset_id}.{table_name}'
      to_gbq(df, table_full_id, project_id=project_id, if_exists='replace', credentials=credentials)
      print(f'Table {table_name} loaded successfully!')

#### 6.1 Load to Big Query

In [18]:
credentials = service_account.Credentials.from_service_account_file(google_application_credentials)

dfs = [dim_destinations, dim_routes, dim_route_details, dim_users, routes_fact]
tables = ['dim_destinations', 'dim_routes', 'dim_route_details', 'dim_users', 'routes_fact']

load_to_gbq(credentials, project_id, dataset_id, tables, dfs)

100%|██████████| 1/1 [00:00<?, ?it/s]


Table dim_destinations loaded successfully!


100%|██████████| 1/1 [00:00<?, ?it/s]


Table dim_routes loaded successfully!


100%|██████████| 1/1 [00:00<?, ?it/s]


Table dim_route_details loaded successfully!


100%|██████████| 1/1 [00:00<?, ?it/s]


Table dim_users loaded successfully!


100%|██████████| 1/1 [00:00<?, ?it/s]

Table routes_fact loaded successfully!



