# 1. Import Library yang dibutuhkan

In [695]:
from sqlalchemy import create_engine, inspect
from dotenv import load_dotenv
import os
import pandas as pd
from datetime import datetime
import re
import html
from google.cloud import bigquery
from google.oauth2 import service_account

## 1.1 Connect to Database Backend

In [696]:
load_dotenv()

user = os.getenv('DB_USER')
password = os.getenv('DB_PASSWORD')
host = os.getenv('DB_HOST')
port = os.getenv('DB_PORT')
database = os.getenv('DB_NAME')

def get_connection():
    return create_engine(
        f"mysql+pymysql://{user}:{password}@{host}:{port}/{database}"
    )

def get_all_tables(engine):
    inspector = inspect(engine)
    return inspector.get_table_names()

def table_to_dataframe(engine, table_name):
    with engine.connect() as connection:
        query = f"SELECT * FROM {table_name}"
        result = connection.execute(query)
        df = pd.DataFrame(result.fetchall(), columns=result.keys())
        return df

engine = get_connection()
print(f"Koneksi ke {host} untuk user {user} sukses dibuat!.")

Koneksi ke 104.198.27.207 untuk user dataengineer sukses dibuat!.


# 2. Extract

## 2.1 Convert Tables to Dataframe

In [697]:
tables = get_all_tables(engine)
print(f"Tables in the database: {tables}")

Tables in the database: ['admins', 'customize_watering_reminders', 'fertilizers', 'notifications', 'plant_categories', 'plant_characteristics', 'plant_faqs', 'plant_images', 'plant_instruction_categories', 'plant_instructions', 'plant_progresses', 'plant_reminders', 'plants', 'user_plant_histories', 'user_plants', 'users', 'watering_histories']


### 2.1.1 All Dataframe

In [698]:
output_dir = "../data_source_csv"

for table in tables:
    df_variable_name = f"df_{table}"
    globals()[df_variable_name] = table_to_dataframe(engine, table)
    print(f"Menampilkan Dataframe dari tabel: {table}")
    display(globals()[df_variable_name])
    
    csv_filename = os.path.join(output_dir, f"{table}.csv")
    globals()[df_variable_name].to_csv(csv_filename, index=False)
    print(f"Menyimpan Dataframe dari tabel {table} ke {csv_filename} \n")

Menampilkan Dataframe dari tabel: admins


Unnamed: 0,id,name,email,password,url_image,created_at,updated_at
0,1,Octaviano Ryan Eka Putra Hartanto,octavianoryan123@gmail.com,$2a$08$wAw9SfEDq6EsDZ7NqYpNnOXE6aV1GB3YUA3zWpT...,,2024-05-28 09:13:45.846,2024-05-28 09:13:45.846
1,2,Anggita Prameswari Darmawan,aprameswarid@gmail.com,$2a$08$MDwcRNb2SadCbSiUORr6H.QISr6j3ygPcGGhJTp...,,2024-06-03 03:25:41.667,2024-06-03 03:25:41.667
2,3,Anggita Darmawan,anggitadarmawan3@gmail.com,$2a$08$lFrhHW1q9rG/QmrTPeLy9OqQFZtwj/k72HfJ2oq...,,2024-06-05 01:27:30.592,2024-06-05 01:27:30.592
3,4,adminFe,adminfe123@gmail.com,$2a$08$OY4lskcKiJO7zldb.n5t2u7KHYtVBKt9zzR96Zq...,,2024-06-15 03:50:19.301,2024-06-15 03:50:19.301


Menyimpan Dataframe dari tabel admins ke ../data_source_csv/admins.csv 

Menampilkan Dataframe dari tabel: customize_watering_reminders


Unnamed: 0,id,my_plant_id,time,recurring,type,created_at,updated_at
0,8,137,17:00,1,daily,2024-06-17,2024-06-17
1,9,153,17:00,1,daily,2024-06-17,2024-06-17


Menyimpan Dataframe dari tabel customize_watering_reminders ke ../data_source_csv/customize_watering_reminders.csv 

Menampilkan Dataframe dari tabel: fertilizers


Unnamed: 0,id,name,compostition,create_at


Menyimpan Dataframe dari tabel fertilizers ke ../data_source_csv/fertilizers.csv 

Menampilkan Dataframe dari tabel: notifications


Unnamed: 0,id,title,body,user_id,is_read,created_at,updated_at,plant_id
0,32,Saatnya meniram tanaman anda,Saatnya meniram tanaman anda,3,1,2024-06-17 00:00:00.000,2024-06-17 14:46:42.965,46
1,33,Customize Watering Reminder,"Hiii Octaviano Ryan Eka Putra Hartanto, It's t...",3,1,2024-06-17 15:00:00.880,2024-06-18 09:55:41.479,46
2,34,Customize Watering Reminder,"Hiii Octaviano Ryan Eka Putra Hartanto, It's t...",3,0,2024-06-18 08:00:03.641,2024-06-18 09:55:54.125,62
3,35,Customize Watering Reminder,"Hiii Dafa Aldian, It's time to water your plan...",14,0,2024-06-18 09:00:04.040,2024-06-18 09:00:04.040,61
4,36,Customize Watering Reminder,"Hiii Octaviano Ryan Eka Putra Hartanto, It's t...",3,0,2024-06-18 10:00:03.341,2024-06-18 10:00:03.341,62
5,37,Customize Watering Reminder,"Hiii Dafa Aldian, It's time to water your plan...",14,0,2024-06-18 10:00:05.550,2024-06-18 10:00:05.550,61
6,38,Customize Watering Reminder,"Hiii Octaviano Ryan Eka Putra Hartanto, It's t...",3,0,2024-06-18 10:00:06.696,2024-06-18 10:00:06.696,62
7,39,Customize Watering Reminder,"Hiii Octaviano Ryan Eka Putra Hartanto, It's t...",3,0,2024-06-18 10:00:08.609,2024-06-18 10:00:08.609,62
8,40,Customize Watering Reminder,"Hiii Dafa Aldian, It's time to water your plan...",14,0,2024-06-18 10:00:09.795,2024-06-18 10:00:09.795,61
9,41,Customize Watering Reminder,"Hiii Octaviano Ryan Eka Putra Hartanto, It's t...",3,0,2024-06-18 10:00:10.096,2024-06-18 10:00:10.096,62


Menyimpan Dataframe dari tabel notifications ke ../data_source_csv/notifications.csv 

Menampilkan Dataframe dari tabel: plant_categories


Unnamed: 0,id,name,image_url,created_at,updated_at
0,1,Fruits,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-05-30 00:45:23.789,2024-06-14 02:51:56.957
1,2,Succulents,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2023-05-28 10:00:00.000,2024-06-14 02:53:30.136
2,4,Angiosperms,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2023-05-28 10:00:00.000,2024-06-14 02:53:50.500
3,5,Herbs,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2023-05-28 10:00:00.000,2024-06-14 02:54:18.975
4,7,Shrubs,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2023-05-28 10:00:00.000,2024-06-14 02:54:50.460
5,8,Ferns,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-07 07:07:57.671,2024-06-14 02:55:21.187
6,9,Trees,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-07 07:18:57.328,2024-06-14 02:56:14.506
7,10,Climbers,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-14 02:57:03.527,2024-06-14 02:57:03.527
8,11,Conifers,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-14 02:57:20.947,2024-06-14 02:57:20.947
9,12,Annual,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-14 02:57:33.736,2024-06-14 02:57:33.736


Menyimpan Dataframe dari tabel plant_categories ke ../data_source_csv/plant_categories.csv 

Menampilkan Dataframe dari tabel: plant_characteristics


Unnamed: 0,id,plant_id,height,height_unit,wide,wide_unit,leaf_color
0,61,61,3,Meter,300,Centimeter,Red
1,62,62,2,Meter,1,Meter,Green
2,63,63,600,Centimeter,900,Centimeter,Green
3,64,64,400,Centimeter,300,Centimeter,Green
4,65,65,200,Centimeter,300,Centimeter,Green
5,66,66,1200,Centimeter,600,Centimeter,Green
6,67,67,2,Meter,1,Meter,Green
7,68,68,500,Centimeter,300,Centimeter,Green
8,69,69,150,Centimeter,75,Centimeter,Green
9,75,75,500,Centimeter,400,Centimeter,Green


Menyimpan Dataframe dari tabel plant_characteristics ke ../data_source_csv/plant_characteristics.csv 

Menampilkan Dataframe dari tabel: plant_faqs


Unnamed: 0,id,plant_id,question,answer,created_at,updated_at
0,76,61,<p><em>Mau tanya apa</em></p>,<p><em><u>Ya gatauu?</u></em></p>,2024-06-17 08:00:05.709,2024-06-17 08:00:05.709
1,77,62,<p>Recommendation for fertilizer types and amo...,<p>Use a balanced 10-10-10 fertilizer at plant...,2024-06-18 02:19:10.655,2024-06-18 02:19:10.655
2,78,62,<p>Recommendation for safe pest control to add...,<p>Use neem oil or insecticidal soap to manage...,2024-06-18 02:19:10.655,2024-06-18 02:19:10.655
3,79,62,<p>How to improve tomato flavor?</p>,"<p>Provide ample sunlight, avoid over-fertiliz...",2024-06-18 02:19:10.655,2024-06-18 02:19:10.655
4,80,62,<p>How to prevent blossom end rot in tomatoes?...,<p>Blossom end rot is caused by calcium defici...,2024-06-18 02:19:10.655,2024-06-18 02:19:10.655
5,81,63,<p>Recommendation for fertilizer types and amo...,<p>Use a slow-release fertilizer with a higher...,2024-06-18 02:38:23.623,2024-06-18 02:38:23.623
6,82,63,<p>Recommendation for safe pest control to add...,<p>Introduce beneficial insects like ladybugs ...,2024-06-18 02:38:23.623,2024-06-18 02:38:23.623
7,83,63,<p>Can lavender be grown indoors?</p>,<p>Lavender can be grown indoors in containers...,2024-06-18 02:38:23.623,2024-06-18 02:38:23.623
8,84,63,<p>How often should lavender be pruned?</p>,<p>Lavender should be lightly pruned after flo...,2024-06-18 02:38:23.623,2024-06-18 02:38:23.623
9,85,64,<p>Recommendation for fertilizer types and amo...,<p>Use a balanced liquid fertilizer like 10-10...,2024-06-18 02:53:10.666,2024-06-18 02:53:10.666


Menyimpan Dataframe dari tabel plant_faqs ke ../data_source_csv/plant_faqs.csv 

Menampilkan Dataframe dari tabel: plant_images


Unnamed: 0,id,plant_id,file_name,is_primary,created_at,updated_at
0,71,61,https://res.cloudinary.com/dxrz0cg5z/image/upl...,1,2024-06-17 08:00:05.709,2024-06-17 08:00:05.709
1,72,61,https://res.cloudinary.com/dxrz0cg5z/image/upl...,0,2024-06-17 08:00:05.709,2024-06-17 08:00:05.709
2,73,62,https://res.cloudinary.com/dxrz0cg5z/image/upl...,1,2024-06-18 02:19:10.655,2024-06-18 02:19:10.655
3,74,62,https://res.cloudinary.com/dxrz0cg5z/image/upl...,0,2024-06-18 02:19:10.655,2024-06-18 02:19:10.655
4,75,63,https://res.cloudinary.com/dxrz0cg5z/image/upl...,1,2024-06-18 02:38:23.623,2024-06-18 02:38:23.623
5,76,63,https://res.cloudinary.com/dxrz0cg5z/image/upl...,0,2024-06-18 02:38:23.623,2024-06-18 02:38:23.623
6,77,64,https://res.cloudinary.com/dxrz0cg5z/image/upl...,1,2024-06-18 02:53:10.666,2024-06-18 02:53:10.666
7,78,64,https://res.cloudinary.com/dxrz0cg5z/image/upl...,0,2024-06-18 02:53:10.666,2024-06-18 02:53:10.666
8,79,65,https://res.cloudinary.com/dxrz0cg5z/image/upl...,1,2024-06-18 04:11:31.777,2024-06-18 04:11:31.777
9,80,65,https://res.cloudinary.com/dxrz0cg5z/image/upl...,0,2024-06-18 04:11:31.777,2024-06-18 04:11:31.777


Menyimpan Dataframe dari tabel plant_images ke ../data_source_csv/plant_images.csv 

Menampilkan Dataframe dari tabel: plant_instruction_categories


Unnamed: 0,id,name,description,image_url,created_at,updated_at
0,1,Soil Preparation,"In this section, you will learn how to properl...",https://res.cloudinary.com/dxrz0cg5z/image/upl...,2023-05-28 10:00:00,2024-06-14 02:16:24.448
1,2,Planting Seeds,"At this stage, you will learn how to properly ...",https://res.cloudinary.com/dxrz0cg5z/image/upl...,2023-05-28 10:00:00,2024-06-14 02:17:35.928
2,3,Plant Care,"In this section, you will learn how to properl...",https://res.cloudinary.com/dxrz0cg5z/image/upl...,2023-05-28 10:00:00,2024-06-14 02:18:46.131
3,4,Harvest,"At this stage, you will learn how to harvest p...",https://res.cloudinary.com/dxrz0cg5z/image/upl...,2023-05-28 10:00:00,2024-06-14 02:19:50.778


Menyimpan Dataframe dari tabel plant_instruction_categories ke ../data_source_csv/plant_instruction_categories.csv 

Menampilkan Dataframe dari tabel: plant_instructions


Unnamed: 0,id,plant_id,step_number,step_title,step_description,step_image_url,additional_tips,created_at,updated_at,instruction_category_id
0,92,61,1,Apaya,<p><u>enaknya apa</u></p>,https://res.cloudinary.com/dxrz0cg5z/image/upl...,,2024-06-17 08:00:05.709,2024-06-17 08:00:05.709,3
1,93,62,1,Preparing the soil,<p>Prepare well-draining soil rich in organic ...,https://res.cloudinary.com/dxrz0cg5z/image/upl...,,2024-06-18 02:19:10.655,2024-06-18 02:19:10.655,1
2,94,62,2,Sowing tomato seeds,<p>Sow tomato seeds indoors 6-8 weeks before t...,https://res.cloudinary.com/dxrz0cg5z/image/upl...,,2024-06-18 02:19:10.655,2024-06-18 02:19:10.655,2
3,95,62,3,Tomato Plant Care,<ul><li>Water regularly to keep the soil moist...,https://res.cloudinary.com/dxrz0cg5z/image/upl...,,2024-06-18 02:19:10.655,2024-06-18 02:19:10.655,3
4,96,62,4,Harvesting Tomatoes,<p>Harvest tomatoes when they are fully colore...,https://res.cloudinary.com/dxrz0cg5z/image/upl...,,2024-06-18 02:19:10.655,2024-06-18 02:19:10.655,4
...,...,...,...,...,...,...,...,...,...,...
56,168,80,4,Harvesting Aloe Vera,<p>Harvest mature leaves as needed by cutting ...,https://res.cloudinary.com/dxrz0cg5z/image/upl...,,2024-06-18 09:17:12.628,2024-06-18 09:17:12.628,4
57,169,81,1,gk tau,<p>biarkan tumbuh secara alami</p>,,,2024-06-18 10:23:26.436,2024-06-18 10:23:26.436,2
58,173,85,1,Cara Merawat,<p>Biarkan terkena matahari sepanjang hari dan...,https://res.cloudinary.com/dxrz0cg5z/image/upl...,,2024-06-18 12:00:40.759,2024-06-18 12:00:40.759,3
59,174,86,1,Preparing the Soil,<p>Use fertile soil with good drainage. Incorp...,https://res.cloudinary.com/dxrz0cg5z/image/upl...,,2024-06-18 12:06:51.921,2024-06-18 12:06:51.921,1


Menyimpan Dataframe dari tabel plant_instructions ke ../data_source_csv/plant_instructions.csv 

Menampilkan Dataframe dari tabel: plant_progresses


Unnamed: 0,id,plant_id,user_id,image_url,created_at,updated_at
0,23,61,23,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-18 06:24:07.209,2024-06-18 06:24:07.209
1,24,61,23,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-18 06:26:09.824,2024-06-18 06:26:09.824
2,25,61,23,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-18 06:26:34.694,2024-06-18 06:26:34.694
3,26,61,23,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-18 06:28:34.932,2024-06-18 06:28:34.932
4,27,75,14,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-18 08:55:18.746,2024-06-18 08:55:18.746
5,28,75,14,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-18 09:01:57.549,2024-06-18 09:01:57.549
6,29,75,14,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-18 09:04:18.157,2024-06-18 09:04:18.157
7,30,75,14,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-18 09:08:23.158,2024-06-18 09:08:23.158
8,31,75,14,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-18 09:10:08.294,2024-06-18 09:10:08.294
9,32,62,14,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-18 09:10:29.009,2024-06-18 09:10:29.009


Menyimpan Dataframe dari tabel plant_progresses ke ../data_source_csv/plant_progresses.csv 

Menampilkan Dataframe dari tabel: plant_reminders


Unnamed: 0,id,plant_id,watering_frequency,each,watering_amount,unit,watering_time,weather_condition,condition_description,created_at,updated_at
0,61,61,5,Week,300,Liter (l),16:30,"Bright Sun,Rainy,Stormy Rain","Sun,Rainy,Stormy",2024-06-17 08:00:05.718,2024-06-17 08:00:05.718
1,62,62,3,Week,2,Liter (l),09:00,"Bright Sun,Part Cloudy Sun,Rainy,Stormy Rain","Water in the early morning or late afternoon.,...",2024-06-18 02:19:10.663,2024-06-18 02:19:10.663
2,63,63,1,Week,1,Liter (l),08:00,"Bright Sun,Part Cloudy Sun,Rainy,Stormy Rain","Water early in the morning.,Maintain regular w...",2024-06-18 02:38:23.630,2024-06-18 02:38:23.630
3,64,64,4,Week,500,Milliliter (ml),08:30,"Bright Sun,Part Cloudy Sun,Rainy,Stormy Rain","Water in the early morning.,Maintain regular w...",2024-06-18 02:53:10.675,2024-06-18 02:53:10.675
4,65,65,2,Week,1,Liter (l),07:30,"Bright Sun,Part Cloudy Sun,Rainy,Stormy Rain","Water in the early morning.,Maintain regular w...",2024-06-18 04:11:31.785,2024-06-18 04:11:31.785
5,66,66,1,Week,500,Milliliter (ml),07:30,"Bright Sun,Part Cloudy Sun,Rainy,Stormy Rain","Water early in the morning.,Maintain regular w...",2024-06-18 04:42:09.530,2024-06-18 04:42:09.530
6,67,67,2,Week,1,Liter (l),08:00,"Bright Sun,Part Cloudy Sun,Rainy,Stormy Rain","Water in the early morning or late afternoon.,...",2024-06-18 04:58:57.711,2024-06-18 04:58:57.711
7,68,68,2,Week,500,Milliliter (ml),08:00,"Bright Sun,Part Cloudy Sun,Rainy,Stormy Rain","Water in the early morning or late afternoon.,...",2024-06-18 05:14:53.023,2024-06-18 05:14:53.023
8,69,69,2,Week,2,Liter (l),08:00,"Bright Sun,Part Cloudy Sun,Rainy,Stormy Rain","Water early in the morning.,Maintain regular w...",2024-06-18 05:33:37.933,2024-06-18 05:33:37.933
9,75,75,3,Week,1,Liter (l),07:30,"Bright Sun,Part Cloudy Sun,Rainy,Stormy Rain","Water in the early morning.,Maintain a regular...",2024-06-18 06:04:39.383,2024-06-18 06:04:39.383


Menyimpan Dataframe dari tabel plant_reminders ke ../data_source_csv/plant_reminders.csv 

Menampilkan Dataframe dari tabel: plants


Unnamed: 0,id,name,description,is_toxic,harvest_duration,sunlight,planting_time,plant_category_id,climate_condition,plant_characteristic_id,created_at,updated_at,additional_tips
0,61,Rafflesia-Rafflesiaceae,<p>Bunga <strong>Langka</strong></p>,1,6,Partsun,Autumn,11,Wet,0,2024-06-17 08:00:05.709,2024-06-17 08:00:05.709,<p><strong><em><u>Tipsnya ya gituu</u></em></s...
1,62,Tomato-Solanaceae,<p>Tomatoes are a warm-season vegetable that g...,0,3,Fullsun,Spring,1,Dry,0,2024-06-18 02:19:10.655,2024-06-18 02:19:10.655,<ul><li>Support plants with stakes or cages to...
2,63,Lavender - Lamiaceae,<p>Lavender is a fragrant herb known for its a...,0,6,Fullsun,Summer,7,Dry,0,2024-06-18 02:38:23.623,2024-06-18 02:38:23.623,<ul><li>Mulch around plants to retain moisture...
3,64,Basil - Lamiaceae,<p>Basil is a popular herb used in many cuisin...,0,1,Partsun,Spring,5,Wet,0,2024-06-18 02:53:10.666,2024-06-18 02:53:10.666,<ul><li>Regularly pinch off flower buds to pro...
4,65,Strawberry - Rosaceae,<p>Strawberries are perennial plants known for...,0,3,Shade,Spring,1,Dry,0,2024-06-18 04:11:31.777,2024-06-18 04:11:31.777,<ul><li>Provide good air circulation to preven...
5,66,Rosemary - Lamiaceae,<p>Rosemary is an aromatic herb with needle-li...,0,1,Fullsun,Spring,5,Dry,0,2024-06-18 04:42:09.521,2024-06-18 04:42:09.521,<ul><li>Protect from frost in colder climates ...
6,67,Sunflower - Asteraceae,<p>Sunflowers are tall flowering plants with l...,0,3,Fullsun,Spring,12,Dry,0,2024-06-18 04:58:57.702,2024-06-18 04:58:57.702,<ul><li>Deadhead spent flowers to encourage co...
7,68,Peppermint - Lamiaceae,<p>Peppermint is a hybrid mint known for its s...,0,3,Partsun,Spring,5,Wet,0,2024-06-18 05:14:53.014,2024-06-18 05:14:53.014,<ul><li>Divide peppermint plants every 2-3 yea...
8,69,Rose - Rosaceae,<p>Roses are ornamental flowering plants known...,0,6,Fullsun,Spring,7,Wet,0,2024-06-18 05:33:37.917,2024-06-18 05:33:37.917,<ul><li>Mulch around roses to retain moisture ...
9,75,Cucumber - Cucurbitaceae,<p>Cucumbers are climbing plants known for the...,0,3,Fullsun,Spring,1,Wet,0,2024-06-18 06:04:39.375,2024-06-18 06:04:39.375,"<ul><li>Provide support for climbing plants, s..."


Menyimpan Dataframe dari tabel plants ke ../data_source_csv/plants.csv 

Menampilkan Dataframe dari tabel: user_plant_histories


Unnamed: 0,id,user_id,plant_id,plant_name,plant_category,plant_image_url,created_at,updated_at
0,2,3,46,Rose update,Fruits,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-15 23:57:01.054,2024-06-15 23:57:01.054
1,3,14,46,Rose update,Fruits,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-16 04:29:58.931,2024-06-16 04:29:58.931
2,4,14,55,asdasd-dsfdsdf,Fruits,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-16 04:36:07.031,2024-06-16 04:36:07.031
3,5,14,55,asdasd-dsfdsdf,Fruits,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-16 04:56:05.872,2024-06-16 04:56:05.872
4,6,14,46,Rose update,Fruits,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-16 04:56:47.220,2024-06-16 04:56:47.220
5,7,14,55,asdasd-dsfdsdf,Fruits,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-16 07:33:02.184,2024-06-16 07:33:02.184
6,8,14,59,asdaasd-sdasd,Fruits,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-16 08:10:30.869,2024-06-16 08:10:30.869
7,9,14,46,Rose update,Fruits,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-16 08:33:58.143,2024-06-16 08:33:58.143
8,10,14,55,asdasd-dsfdsdf,Fruits,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-16 08:36:24.144,2024-06-16 08:36:24.144
9,11,14,46,Rose update,Fruits,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-16 08:38:36.573,2024-06-16 08:38:36.573


Menyimpan Dataframe dari tabel user_plant_histories ke ../data_source_csv/user_plant_histories.csv 

Menampilkan Dataframe dari tabel: user_plants


Unnamed: 0,id,user_id,plant_id,created_at,updated_at,last_watered_at,customize_name
0,137,3,62,2024-06-17 00:00:00.000,2024-06-17 00:00:00.000,,
1,149,17,62,2024-06-18 04:30:35.996,2024-06-18 04:30:35.996,,Tomato-Solanaceae
2,150,14,75,2024-06-18 06:23:25.398,2024-06-18 06:23:25.398,,Cucumber - Cucurbitaceae
3,151,23,61,2024-06-18 06:23:52.937,2024-06-18 06:23:52.937,,Rafflesia-Rafflesiaceae
4,152,14,62,2024-06-18 07:15:09.863,2024-06-18 07:15:09.863,,Tomato-Solanaceae
5,153,14,61,2024-06-18 07:15:20.779,2024-06-18 07:15:20.779,,Rafflesia-Rafflesiaceae
6,154,14,63,2024-06-18 07:15:25.812,2024-06-18 07:15:25.812,,Lavender - Lamiaceae
7,155,14,64,2024-06-18 07:15:31.449,2024-06-18 07:15:31.449,,Basil - Lamiaceae
8,156,14,65,2024-06-18 07:15:44.372,2024-06-18 07:15:44.372,,Strawberry - Rosaceae
9,157,28,61,2024-06-18 09:40:27.099,2024-06-18 09:40:27.099,,Rafflesia-Rafflesiaceae


Menyimpan Dataframe dari tabel user_plants ke ../data_source_csv/user_plants.csv 

Menampilkan Dataframe dari tabel: users


Unnamed: 0,id,name,email,password,is_active,otp,url_image,created_at,updated_at,fcm_token
0,3,Octaviano Ryan Eka Putra Hartanto,octavianoryan030@gmail.com,$2a$08$R9/7B15r6CgnfHhTEvW3o.qDQtB61Vmw2WxL8OQ...,1,986482,,2024-05-28 09:08:58.892,2024-05-28 09:08:58.892,fGlstT-gQ0O4svve3G3iBZ:APA91bFyg33gkyND-oBOpYL...
1,5,Setiabudi,mamansetiabudi12061982@gmail.com,$2a$08$ur/09YUT0MI2s5CvTcKNWOz29g73ZbaIaa7mdti...,1,364170,,2024-06-04 06:02:55.224,2024-06-04 06:02:55.224,
2,6,Aletha Safa,yellowblue@gmail.com,$2a$08$kRWcnKu27ojUmST7WqB17.OZYKI1En2MYHFVtMg...,0,486315,,2024-06-05 05:50:35.189,2024-06-05 05:50:35.189,
3,7,Valdimir Putin,kangkingkung79@gmail.com,$2a$08$8TlhkfNRNocMnI1RAs1gXuQXZomO1ILgP8vQVbn...,1,714585,,2024-06-05 12:25:47.200,2024-06-05 12:25:47.200,092013
4,9,Aletha Safa,yellow3@gmail.com,$2a$08$0MeHpb3QgFnL51BfR7YZEu8MyBgs050e2K3plFL...,0,870521,,2024-06-06 14:49:11.846,2024-06-06 14:49:11.846,092013
5,10,Aletha Safa,blue04@gmail.com,$2a$08$Y6QSW6XzuXdouaX7krSLEuYXimEGEhURjVBaAzp...,0,841146,,2024-06-06 15:41:47.729,2024-06-06 15:41:47.729,092013
6,11,Aletha Safa,blue05@gmail.com,$2a$08$p2RCU3u8YgiHG8oiuPFcze2DYhAMXSYppX88bfW...,0,229096,,2024-06-06 15:43:39.360,2024-06-06 15:43:39.360,092013
7,12,Aletha Safa,blue06@gmail.com,$2a$08$tqSwo4MtjhOFNYgmta8x3.2Esa4fnkEDFgK7SiH...,0,853420,,2024-06-07 03:59:48.264,2024-06-07 03:59:48.264,092013
8,14,Dafa Aldian,dafaaldian155@gmail.com,$2a$08$igkmrN4.3Uwe8TjnX8uYhuKfhhKRc7QyQsGk0PN...,1,290925,,2024-06-09 15:31:59.073,2024-06-09 15:31:59.073,fowtobnNSrGVOH_a3KOzMa:APA91bH9lp6Rbws3oI6Hngz...
9,15,Annisa,annisa@gmail.com,$2a$08$43fz/1GQXWmR7oEyDAW/6.3NIXE9jFD/dTyW12o...,0,122927,,2024-06-09 16:26:37.421,2024-06-09 16:26:37.421,dI2YkfWPS0eEaTsSNfgDYZ:APA91bFYX3YZVIDzbad7_yR...


Menyimpan Dataframe dari tabel users ke ../data_source_csv/users.csv 

Menampilkan Dataframe dari tabel: watering_histories


Unnamed: 0,id,plant_id,user_id,created_at,updated_at
0,7,62,3,2024-06-18 10:28:04.543,2024-06-18 10:28:04.543


Menyimpan Dataframe dari tabel watering_histories ke ../data_source_csv/watering_histories.csv 



# 3. Transform

## 3.1 Cleaning Data

In [699]:
def remove_html_tags(text):
    """Fungsi untuk menghapus tag HTML dan entitas HTML dari teks."""
    # Menghapus tag HTML
    clean = re.compile('<.*?>')
    text = re.sub(clean, '', text)
    # Menghapus entitas HTML
    text = html.unescape(text)
    return text

def cleanse_dataframe(df):
    print("Memeriksa missing values...")
    
    # Mengisi missing values berdasarkan tipe data kolom
    for col in df.columns:
        if df[col].isnull().any():
            if df[col].dtype == 'int64' or df[col].dtype == 'float64':
                print(f"Mengisi missing values di kolom '{col}' dengan 0...")
                df[col].fillna(0, inplace=True)
            elif df[col].dtype == 'object' or df[col].dtype.name == 'category':
                print(f"Mengisi missing values di kolom '{col}' dengan '-'...")
                df[col].fillna('-', inplace=True)
            elif pd.api.types.is_datetime64_any_dtype(df[col]):
                print(f"Mengisi missing values di kolom '{col}' dengan tanggal hari ini...")
                df[col].fillna(pd.Timestamp('today'), inplace=True)
    
    print("Membersihkan tag HTML dari kolom teks...")
    # Membersihkan tag HTML dan entitas HTML dari kolom teks (object atau category)
    for col in df.columns:
        if df[col].dtype == 'object' or df[col].dtype.name == 'category':
            print(f"Membersihkan tag HTML di kolom '{col}'...")
            df[col] = df[col].apply(lambda x: remove_html_tags(x) if pd.notnull(x) else x)
    
    print("Memeriksa duplikasi...")
    duplicate_rows = df.duplicated().sum()
    print(f"Jumlah baris duplikat: {duplicate_rows}")
    
    if duplicate_rows > 0:
        print("Menghapus baris duplikat...")
        df.drop_duplicates(inplace=True)

    return df

In [700]:
def cleanse_dataframe_fact(df):
    print("Memeriksa missing values...")
    
    # Mengisi missing values berdasarkan tipe data kolom
    for col in df.columns:
        if df[col].isnull().any():
            if df[col].dtype == 'int64' or df[col].dtype == 'float64':
                print(f"Mengisi missing values di kolom '{col}' dengan -1...")
                df[col].fillna(-1, inplace=True)
    
    return df

In [701]:
for table in tables:
    df_variable_name = f"df_{table}"
    
    # Cleansing DataFrame
    globals()[df_variable_name] = cleanse_dataframe(globals()[df_variable_name])

Memeriksa missing values...
Membersihkan tag HTML dari kolom teks...
Membersihkan tag HTML di kolom 'name'...
Membersihkan tag HTML di kolom 'email'...
Membersihkan tag HTML di kolom 'password'...
Membersihkan tag HTML di kolom 'url_image'...
Memeriksa duplikasi...
Jumlah baris duplikat: 0
Memastikan tipe data yang benar...
Mengonversi kolom 'created_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Mengonversi kolom 'updated_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Memeriksa missing values...
Membersihkan tag HTML dari kolom teks...
Membersihkan tag HTML di kolom 'time'...
Membersihkan tag HTML di kolom 'type'...
Memeriksa duplikasi...
Jumlah baris duplikat: 0
Memastikan tipe data yang benar...
Mengonversi kolom 'created_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Mengonversi kolom 'updated_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Memeriksa missing values...
Membersihkan tag HTML dari kolom teks...
Membersihkan tag HTML di kolom 'id'...
Memb

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['created_at'].fillna(datetime.now(), inplace=True)  # Isi dengan tanggal dan waktu saat ini
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['updated_at'].fillna(datetime.now(), inplace=True)  # Isi dengan tanggal dan waktu saat ini
The behavior will change in pandas 3.0. Th

## 3.2 Informasi Dataframe

In [702]:
def info_dataframe(df):
    print("Menampilkan Informasi Field di tiap Dataframe")
    info_dataframe = df.info()
    print(info_dataframe)
    
    return df

In [703]:
for table in tables:
    df_variable_name = f"df_{table}"
    
    # Informasi DataFrame
    globals()[df_variable_name] = info_dataframe(globals()[df_variable_name])

Menampilkan Informasi Field di tiap Dataframe


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   id          4 non-null      int64         
 1   name        4 non-null      category      
 2   email       4 non-null      category      
 3   password    4 non-null      category      
 4   url_image   4 non-null      category      
 5   created_at  4 non-null      datetime64[ns]
 6   updated_at  4 non-null      datetime64[ns]
dtypes: category(4), datetime64[ns](2), int64(1)
memory usage: 968.0 bytes
None
Menampilkan Informasi Field di tiap Dataframe
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   id           2 non-null      int64         
 1   my_plant_id  2 non-null      int64         
 2   time         2 non-null      category    

## 3.3 Change Type Data

In [704]:
def change_type_data(df):
    # Memastikan tipe data yang benar
    print(f"Memastikan tipe data yang benar...")
    for column in df.columns:
        if df[column].dtype == 'object':
            df[column] = df[column].astype('category')
        elif df[column].dtype == 'float64':
            df[column] = df[column].astype('int64')
    
    # Pastikan kolom tanggal diubah menjadi datetime dengan format '%Y-%m-%d %H:%M'
    date_columns = ['created_at', 'updated_at', 'last_watered_at']
    for column in date_columns:
        if column in df.columns:
            print(f"Mengonversi kolom '{column}' menjadi datetime dengan format '%Y-%m-%d %H:%M'...")
            df[column] = pd.to_datetime(df[column], errors='coerce', format='%Y-%m-%d %H:%M')
    
    # Isi nilai NULL di kolom created_at jika ada
    if 'created_at' in df.columns:
        df['created_at'].fillna(datetime.now(), inplace=True)  # Isi dengan tanggal dan waktu saat ini
    if 'updated_at' in df.columns:
        df['updated_at'].fillna(datetime.now(), inplace=True)  # Isi dengan tanggal dan waktu saat ini
    
    return df

In [705]:
dataframes = []  # List untuk menyimpan nama dataframe

for table in tables:
    df_variable_name = f"df_{table}"
    
    # Cleansing DataFrame
    globals()[df_variable_name] = change_type_data(globals()[df_variable_name])
    
    # Tambahkan nama dataframe ke dalam list
    dataframes.append(df_variable_name)

# Output untuk verifikasi
print(dataframes)

Memastikan tipe data yang benar...
Mengonversi kolom 'created_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Mengonversi kolom 'updated_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Memastikan tipe data yang benar...
Mengonversi kolom 'created_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Mengonversi kolom 'updated_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Memastikan tipe data yang benar...
Memastikan tipe data yang benar...
Mengonversi kolom 'created_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Mengonversi kolom 'updated_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Memastikan tipe data yang benar...
Mengonversi kolom 'created_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Mengonversi kolom 'updated_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Memastikan tipe data yang benar...
Memastikan tipe data yang benar...
Mengonversi kolom 'created_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Mengonversi kolom

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['created_at'].fillna(datetime.now(), inplace=True)  # Isi dengan tanggal dan waktu saat ini
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['updated_at'].fillna(datetime.now(), inplace=True)  # Isi dengan tanggal dan waktu saat ini
The behavior will change in pandas 3.0. Th

### 3.3.1 Menampilkan Informasi Dataframe Kembali

In [706]:
for table in dataframes:
    df_variable_name = f"{table}"
    
    # Informasi DataFrame
    globals()[df_variable_name] = info_dataframe(globals()[df_variable_name])

Menampilkan Informasi Field di tiap Dataframe
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   id          4 non-null      int64         
 1   name        4 non-null      category      
 2   email       4 non-null      category      
 3   password    4 non-null      category      
 4   url_image   4 non-null      category      
 5   created_at  4 non-null      datetime64[ns]
 6   updated_at  4 non-null      datetime64[ns]
dtypes: category(4), datetime64[ns](2), int64(1)
memory usage: 968.0 bytes
None
Menampilkan Informasi Field di tiap Dataframe
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   id           2 non-null      int64         
 1   my_plant_id  2 non-null      int64         


### 3.3.2 Sample Dataframe

In [707]:
df_watering_histories

Unnamed: 0,id,plant_id,user_id,created_at,updated_at
0,7,62,3,2024-06-18 10:28:04.543,2024-06-18 10:28:04.543


## 3.4 Table Fakta &  Dimensional

### 3.4.1 Save to CSV Table Dimensional

In [708]:
dataframes

['df_admins',
 'df_customize_watering_reminders',
 'df_fertilizers',
 'df_notifications',
 'df_plant_categories',
 'df_plant_characteristics',
 'df_plant_faqs',
 'df_plant_images',
 'df_plant_instruction_categories',
 'df_plant_instructions',
 'df_plant_progresses',
 'df_plant_reminders',
 'df_plants',
 'df_user_plant_histories',
 'df_user_plants',
 'df_users',
 'df_watering_histories']

In [709]:
output_dir = "../data_source_dimensional"

for table in tables:
    df_variable_name = f"df_{table}"
    
    csv_filename = os.path.join(output_dir, f"dim_{table}.csv")
    globals()[df_variable_name].to_csv(csv_filename, index=False)
    print(f"Menyimpan Dataframe Dimensional dari dataframe df_{table} ke {csv_filename} \n")

Menyimpan Dataframe Dimensional dari dataframe df_admins ke ../data_source_dimensional/dim_admins.csv 

Menyimpan Dataframe Dimensional dari dataframe df_customize_watering_reminders ke ../data_source_dimensional/dim_customize_watering_reminders.csv 

Menyimpan Dataframe Dimensional dari dataframe df_fertilizers ke ../data_source_dimensional/dim_fertilizers.csv 

Menyimpan Dataframe Dimensional dari dataframe df_notifications ke ../data_source_dimensional/dim_notifications.csv 

Menyimpan Dataframe Dimensional dari dataframe df_plant_categories ke ../data_source_dimensional/dim_plant_categories.csv 



Menyimpan Dataframe Dimensional dari dataframe df_plant_characteristics ke ../data_source_dimensional/dim_plant_characteristics.csv 

Menyimpan Dataframe Dimensional dari dataframe df_plant_faqs ke ../data_source_dimensional/dim_plant_faqs.csv 

Menyimpan Dataframe Dimensional dari dataframe df_plant_images ke ../data_source_dimensional/dim_plant_images.csv 

Menyimpan Dataframe Dimensional dari dataframe df_plant_instruction_categories ke ../data_source_dimensional/dim_plant_instruction_categories.csv 

Menyimpan Dataframe Dimensional dari dataframe df_plant_instructions ke ../data_source_dimensional/dim_plant_instructions.csv 

Menyimpan Dataframe Dimensional dari dataframe df_plant_progresses ke ../data_source_dimensional/dim_plant_progresses.csv 

Menyimpan Dataframe Dimensional dari dataframe df_plant_reminders ke ../data_source_dimensional/dim_plant_reminders.csv 

Menyimpan Dataframe Dimensional dari dataframe df_plants ke ../data_source_dimensional/dim_plants.csv 

Menyimpan Da

### 3.4.2 Dataframe Dimensional

In [710]:
# Path ke direktori yang berisi file CSV
input_dir = '../data_source_dimensional'

# List untuk menyimpan nama dataframe yang dibuat
dataframe_dimensional = []

# Loop untuk membaca semua file CSV dalam direktori
for filename in os.listdir(input_dir):
    if filename.endswith('.csv'):
        # Mengambil nama tabel dari nama file
        table_name = os.path.splitext(filename)[0]  # Menghilangkan ekstensi .csv
        
        # Membuat nama variabel dataframe
        df_variable_name = f"df_{table_name}"
        
        # Membaca file CSV menjadi dataframe
        df_path = os.path.join(input_dir, filename)
        globals()[df_variable_name] = pd.read_csv(df_path)
        
        # Tambahkan nama dataframe ke dalam list
        dataframe_dimensional.append(df_variable_name)

# Output untuk verifikasi
print(dataframe_dimensional)

['df_dim_notifications', 'df_dim_user_plant_histories', 'df_dim_plant_categories', 'df_dim_fertilizers', 'df_dim_plant_instructions', 'df_dim_customize_watering_reminders', 'df_dim_watering_histories', 'df_dim_admins', 'df_dim_plant_faqs', 'df_dim_user_plants', 'df_dim_plant_characteristics', 'df_dim_plants', 'df_dim_users', 'df_dim_plant_instruction_categories', 'df_dim_plant_progresses', 'df_dim_plant_reminders', 'df_dim_plant_images']


In [711]:
dataframe_dimensional

['df_dim_notifications',
 'df_dim_user_plant_histories',
 'df_dim_plant_categories',
 'df_dim_fertilizers',
 'df_dim_plant_instructions',
 'df_dim_customize_watering_reminders',
 'df_dim_watering_histories',
 'df_dim_admins',
 'df_dim_plant_faqs',
 'df_dim_user_plants',
 'df_dim_plant_characteristics',
 'df_dim_plants',
 'df_dim_users',
 'df_dim_plant_instruction_categories',
 'df_dim_plant_progresses',
 'df_dim_plant_reminders',
 'df_dim_plant_images']

In [712]:
for dataframe in dataframe_dimensional:
    df_variable_name = f"{dataframe}"
    
    # Cleansing DataFrame
    globals()[df_variable_name] = cleanse_dataframe(globals()[df_variable_name])

Memeriksa missing values...
Membersihkan tag HTML dari kolom teks...
Membersihkan tag HTML di kolom 'title'...
Membersihkan tag HTML di kolom 'body'...
Membersihkan tag HTML di kolom 'created_at'...
Membersihkan tag HTML di kolom 'updated_at'...
Memeriksa duplikasi...
Jumlah baris duplikat: 0
Memastikan tipe data yang benar...
Mengonversi kolom 'created_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Mengonversi kolom 'updated_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Memeriksa missing values...
Membersihkan tag HTML dari kolom teks...
Membersihkan tag HTML di kolom 'plant_name'...
Membersihkan tag HTML di kolom 'plant_category'...
Membersihkan tag HTML di kolom 'plant_image_url'...
Membersihkan tag HTML di kolom 'created_at'...
Membersihkan tag HTML di kolom 'updated_at'...
Memeriksa duplikasi...
Jumlah baris duplikat: 0
Memastikan tipe data yang benar...
Mengonversi kolom 'created_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Mengonversi kolom 'updated_at

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['created_at'].fillna(datetime.now(), inplace=True)  # Isi dengan tanggal dan waktu saat ini
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['updated_at'].fillna(datetime.now(), inplace=True)  # Isi dengan tanggal dan waktu saat ini
The behavior will change in pandas 3.0. Th

Jumlah baris duplikat: 0
Memastikan tipe data yang benar...
Mengonversi kolom 'created_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Mengonversi kolom 'updated_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Memeriksa missing values...
Mengisi missing values di kolom 'last_watered_at' dengan 0...
Membersihkan tag HTML dari kolom teks...
Membersihkan tag HTML di kolom 'created_at'...
Membersihkan tag HTML di kolom 'updated_at'...
Membersihkan tag HTML di kolom 'customize_name'...
Memeriksa duplikasi...
Jumlah baris duplikat: 0
Memastikan tipe data yang benar...
Mengonversi kolom 'created_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Mengonversi kolom 'updated_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Mengonversi kolom 'last_watered_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Memeriksa missing values...
Membersihkan tag HTML dari kolom teks...
Membersihkan tag HTML di kolom 'height_unit'...
Membersihkan tag HTML di kolom 'wide_unit'...
Mem

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['created_at'].fillna(datetime.now(), inplace=True)  # Isi dengan tanggal dan waktu saat ini
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['updated_at'].fillna(datetime.now(), inplace=True)  # Isi dengan tanggal dan waktu saat ini
The behavior will change in pandas 3.0. Th

In [713]:
df_dim_plants

Unnamed: 0,id,name,description,is_toxic,harvest_duration,sunlight,planting_time,plant_category_id,climate_condition,plant_characteristic_id,created_at,updated_at,additional_tips
0,61,Rafflesia-Rafflesiaceae,Bunga Langka,1,6,Partsun,Autumn,11,Wet,0,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420,Tipsnya ya gituu
1,62,Tomato-Solanaceae,Tomatoes are a warm-season vegetable that grow...,0,3,Fullsun,Spring,1,Dry,0,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420,Support plants with stakes or cages to prevent...
2,63,Lavender - Lamiaceae,Lavender is a fragrant herb known for its arom...,0,6,Fullsun,Summer,7,Dry,0,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420,Mulch around plants to retain moisture and con...
3,64,Basil - Lamiaceae,Basil is a popular herb used in many cuisines ...,0,1,Partsun,Spring,5,Wet,0,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420,Regularly pinch off flower buds to prolong the...
4,65,Strawberry - Rosaceae,Strawberries are perennial plants known for th...,0,3,Shade,Spring,1,Dry,0,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420,Provide good air circulation to prevent fungal...
5,66,Rosemary - Lamiaceae,Rosemary is an aromatic herb with needle-like ...,0,1,Fullsun,Spring,5,Dry,0,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420,Protect from frost in colder climates or bring...
6,67,Sunflower - Asteraceae,Sunflowers are tall flowering plants with larg...,0,3,Fullsun,Spring,12,Dry,0,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420,Deadhead spent flowers to encourage continuous...
7,68,Peppermint - Lamiaceae,Peppermint is a hybrid mint known for its stro...,0,3,Partsun,Spring,5,Wet,0,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420,Divide peppermint plants every 2-3 years to pr...
8,69,Rose - Rosaceae,Roses are ornamental flowering plants known fo...,0,6,Fullsun,Spring,7,Wet,0,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420,Mulch around roses to retain moisture and supp...
9,75,Cucumber - Cucurbitaceae,Cucumbers are climbing plants known for their ...,0,3,Fullsun,Spring,1,Wet,0,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420,"Provide support for climbing plants, such as w..."


### 3.4.3 Merged to Fact Table User Activities

#### 3.4.3.1 Merged dim_my_plants

In [714]:
df_dim_users.rename(columns={'id': 'user_id'}, inplace=True)
df_dim_users.rename(columns={'name': 'user_name'}, inplace=True)
df_dim_users

Unnamed: 0,user_id,user_name,email,password,is_active,otp,url_image,created_at,updated_at,fcm_token
0,3,Octaviano Ryan Eka Putra Hartanto,octavianoryan030@gmail.com,$2a$08$R9/7B15r6CgnfHhTEvW3o.qDQtB61Vmw2WxL8OQ...,1,986482,0,2024-06-18 19:18:43.491234,2024-06-18 19:18:43.492194,fGlstT-gQ0O4svve3G3iBZ:APA91bFyg33gkyND-oBOpYL...
1,5,Setiabudi,mamansetiabudi12061982@gmail.com,$2a$08$ur/09YUT0MI2s5CvTcKNWOz29g73ZbaIaa7mdti...,1,364170,0,2024-06-18 19:18:43.491234,2024-06-18 19:18:43.492194,-
2,6,Aletha Safa,yellowblue@gmail.com,$2a$08$kRWcnKu27ojUmST7WqB17.OZYKI1En2MYHFVtMg...,0,486315,0,2024-06-18 19:18:43.491234,2024-06-18 19:18:43.492194,-
3,7,Valdimir Putin,kangkingkung79@gmail.com,$2a$08$8TlhkfNRNocMnI1RAs1gXuQXZomO1ILgP8vQVbn...,1,714585,0,2024-06-18 19:18:43.491234,2024-06-18 19:18:43.492194,092013
4,9,Aletha Safa,yellow3@gmail.com,$2a$08$0MeHpb3QgFnL51BfR7YZEu8MyBgs050e2K3plFL...,0,870521,0,2024-06-18 19:18:43.491234,2024-06-18 19:18:43.492194,092013
5,10,Aletha Safa,blue04@gmail.com,$2a$08$Y6QSW6XzuXdouaX7krSLEuYXimEGEhURjVBaAzp...,0,841146,0,2024-06-18 19:18:43.491234,2024-06-18 19:18:43.492194,092013
6,11,Aletha Safa,blue05@gmail.com,$2a$08$p2RCU3u8YgiHG8oiuPFcze2DYhAMXSYppX88bfW...,0,229096,0,2024-06-18 19:18:43.491234,2024-06-18 19:18:43.492194,092013
7,12,Aletha Safa,blue06@gmail.com,$2a$08$tqSwo4MtjhOFNYgmta8x3.2Esa4fnkEDFgK7SiH...,0,853420,0,2024-06-18 19:18:43.491234,2024-06-18 19:18:43.492194,092013
8,14,Dafa Aldian,dafaaldian155@gmail.com,$2a$08$igkmrN4.3Uwe8TjnX8uYhuKfhhKRc7QyQsGk0PN...,1,290925,0,2024-06-18 19:18:43.491234,2024-06-18 19:18:43.492194,fowtobnNSrGVOH_a3KOzMa:APA91bH9lp6Rbws3oI6Hngz...
9,15,Annisa,annisa@gmail.com,$2a$08$43fz/1GQXWmR7oEyDAW/6.3NIXE9jFD/dTyW12o...,0,122927,0,2024-06-18 19:18:43.491234,2024-06-18 19:18:43.492194,dI2YkfWPS0eEaTsSNfgDYZ:APA91bFYX3YZVIDzbad7_yR...


In [715]:
df_dim_plants.rename(columns={'id': 'plant_id'}, inplace=True)
df_dim_plants.rename(columns={'name': 'plant_name'}, inplace=True)
df_dim_plants

Unnamed: 0,plant_id,plant_name,description,is_toxic,harvest_duration,sunlight,planting_time,plant_category_id,climate_condition,plant_characteristic_id,created_at,updated_at,additional_tips
0,61,Rafflesia-Rafflesiaceae,Bunga Langka,1,6,Partsun,Autumn,11,Wet,0,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420,Tipsnya ya gituu
1,62,Tomato-Solanaceae,Tomatoes are a warm-season vegetable that grow...,0,3,Fullsun,Spring,1,Dry,0,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420,Support plants with stakes or cages to prevent...
2,63,Lavender - Lamiaceae,Lavender is a fragrant herb known for its arom...,0,6,Fullsun,Summer,7,Dry,0,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420,Mulch around plants to retain moisture and con...
3,64,Basil - Lamiaceae,Basil is a popular herb used in many cuisines ...,0,1,Partsun,Spring,5,Wet,0,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420,Regularly pinch off flower buds to prolong the...
4,65,Strawberry - Rosaceae,Strawberries are perennial plants known for th...,0,3,Shade,Spring,1,Dry,0,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420,Provide good air circulation to prevent fungal...
5,66,Rosemary - Lamiaceae,Rosemary is an aromatic herb with needle-like ...,0,1,Fullsun,Spring,5,Dry,0,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420,Protect from frost in colder climates or bring...
6,67,Sunflower - Asteraceae,Sunflowers are tall flowering plants with larg...,0,3,Fullsun,Spring,12,Dry,0,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420,Deadhead spent flowers to encourage continuous...
7,68,Peppermint - Lamiaceae,Peppermint is a hybrid mint known for its stro...,0,3,Partsun,Spring,5,Wet,0,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420,Divide peppermint plants every 2-3 years to pr...
8,69,Rose - Rosaceae,Roses are ornamental flowering plants known fo...,0,6,Fullsun,Spring,7,Wet,0,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420,Mulch around roses to retain moisture and supp...
9,75,Cucumber - Cucurbitaceae,Cucumbers are climbing plants known for their ...,0,3,Fullsun,Spring,1,Wet,0,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420,"Provide support for climbing plants, such as w..."


In [716]:
df_dim_user_plants

Unnamed: 0,id,user_id,plant_id,created_at,updated_at,last_watered_at,customize_name
0,137,3,62,2024-06-18 19:18:43.425460,2024-06-18 19:18:43.429529,NaT,-
1,149,17,62,2024-06-18 19:18:43.425460,2024-06-18 19:18:43.429529,NaT,Tomato-Solanaceae
2,150,14,75,2024-06-18 19:18:43.425460,2024-06-18 19:18:43.429529,NaT,Cucumber - Cucurbitaceae
3,151,23,61,2024-06-18 19:18:43.425460,2024-06-18 19:18:43.429529,NaT,Rafflesia-Rafflesiaceae
4,152,14,62,2024-06-18 19:18:43.425460,2024-06-18 19:18:43.429529,NaT,Tomato-Solanaceae
5,153,14,61,2024-06-18 19:18:43.425460,2024-06-18 19:18:43.429529,NaT,Rafflesia-Rafflesiaceae
6,154,14,63,2024-06-18 19:18:43.425460,2024-06-18 19:18:43.429529,NaT,Lavender - Lamiaceae
7,155,14,64,2024-06-18 19:18:43.425460,2024-06-18 19:18:43.429529,NaT,Basil - Lamiaceae
8,156,14,65,2024-06-18 19:18:43.425460,2024-06-18 19:18:43.429529,NaT,Strawberry - Rosaceae
9,157,28,61,2024-06-18 19:18:43.425460,2024-06-18 19:18:43.429529,NaT,Rafflesia-Rafflesiaceae


In [717]:
# Melakukan merge dalam satu baris kode dengan suffixes untuk menghindari konflik kolom
df_dim_my_plants = df_user_plants.merge(df_dim_users, on='user_id', how='left', suffixes=('', '_user'))

# Memeriksa nama kolom setelah merge
print(df_dim_my_plants.columns)

Index(['id', 'user_id', 'plant_id', 'created_at', 'updated_at',
       'last_watered_at', 'customize_name', 'user_name', 'email', 'password',
       'is_active', 'otp', 'url_image', 'created_at_user', 'updated_at_user',
       'fcm_token'],
      dtype='object')


In [718]:
# Melakukan merge dalam satu baris kode dengan suffixes untuk menghindari konflik kolom
df_dim_my_plants = df_dim_my_plants.merge(df_dim_plants, on='plant_id', how='left', suffixes=('', '_plant'))

# Memeriksa nama kolom setelah merge
print(df_dim_my_plants.columns)

Index(['id', 'user_id', 'plant_id', 'created_at', 'updated_at',
       'last_watered_at', 'customize_name', 'user_name', 'email', 'password',
       'is_active', 'otp', 'url_image', 'created_at_user', 'updated_at_user',
       'fcm_token', 'plant_name', 'description', 'is_toxic',
       'harvest_duration', 'sunlight', 'planting_time', 'plant_category_id',
       'climate_condition', 'plant_characteristic_id', 'created_at_plant',
       'updated_at_plant', 'additional_tips'],
      dtype='object')


In [719]:
# Memilih kolom yang diinginkan
df_dim_my_plants = df_dim_my_plants[['id', 'user_name', 'plant_name', 'created_at', 'updated_at', 'last_watered_at']]
df_dim_my_plants

Unnamed: 0,id,user_name,plant_name,created_at,updated_at,last_watered_at
0,137,Octaviano Ryan Eka Putra Hartanto,Tomato-Solanaceae,2024-06-17 00:00:00.000,2024-06-17 00:00:00.000,NaT
1,149,annisa,Tomato-Solanaceae,2024-06-18 04:30:35.996,2024-06-18 04:30:35.996,NaT
2,150,Dafa Aldian,Cucumber - Cucurbitaceae,2024-06-18 06:23:25.398,2024-06-18 06:23:25.398,NaT
3,151,farhan,Rafflesia-Rafflesiaceae,2024-06-18 06:23:52.937,2024-06-18 06:23:52.937,NaT
4,152,Dafa Aldian,Tomato-Solanaceae,2024-06-18 07:15:09.863,2024-06-18 07:15:09.863,NaT
5,153,Dafa Aldian,Rafflesia-Rafflesiaceae,2024-06-18 07:15:20.779,2024-06-18 07:15:20.779,NaT
6,154,Dafa Aldian,Lavender - Lamiaceae,2024-06-18 07:15:25.812,2024-06-18 07:15:25.812,NaT
7,155,Dafa Aldian,Basil - Lamiaceae,2024-06-18 07:15:31.449,2024-06-18 07:15:31.449,NaT
8,156,Dafa Aldian,Strawberry - Rosaceae,2024-06-18 07:15:44.372,2024-06-18 07:15:44.372,NaT
9,157,dumy akun,Rafflesia-Rafflesiaceae,2024-06-18 09:40:27.099,2024-06-18 09:40:27.099,NaT


In [720]:
df_dim_my_plants.rename(columns={'id': 'my_plant_id'}, inplace=True)

In [721]:
df_dim_my_plants.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   my_plant_id      10 non-null     int64         
 1   user_name        10 non-null     category      
 2   plant_name       10 non-null     category      
 3   created_at       10 non-null     datetime64[ns]
 4   updated_at       10 non-null     datetime64[ns]
 5   last_watered_at  0 non-null      datetime64[ns]
dtypes: category(2), datetime64[ns](3), int64(1)
memory usage: 1.8 KB


##### a. Save to CSV

In [722]:
df_dim_my_plants.to_csv('../data_source_to_load/dim_my_plants.csv', index=False)

#### 3.4.3.2 Merged dim_planting_histories

In [723]:
# Melakukan merge dalam satu baris kode dengan suffixes untuk menghindari konflik kolom
df_dim_user_plant_histories = df_dim_user_plant_histories.merge(
    df_dim_users, on='user_id', how='left', suffixes=('', '_user'))

# Memeriksa nama kolom setelah merge
print(df_dim_user_plant_histories.columns)

Index(['id', 'user_id', 'plant_id', 'plant_name', 'plant_category',
       'plant_image_url', 'created_at', 'updated_at', 'user_name', 'email',
       'password', 'is_active', 'otp', 'url_image', 'created_at_user',
       'updated_at_user', 'fcm_token'],
      dtype='object')


In [724]:
# Memilih kolom yang diinginkan
df_dim_user_plant_histories = df_dim_user_plant_histories[['id', 'user_name', 'plant_name', 'plant_category', 'created_at', 'updated_at']]
df_dim_user_plant_histories.rename(columns={'id': 'planting_history_id'}, inplace=True)
df_dim_user_plant_histories

Unnamed: 0,planting_history_id,user_name,plant_name,plant_category,created_at,updated_at
0,2,Octaviano Ryan Eka Putra Hartanto,Rose update,Fruits,2024-06-18 19:18:43.249113,2024-06-18 19:18:43.249725
1,3,Dafa Aldian,Rose update,Fruits,2024-06-18 19:18:43.249113,2024-06-18 19:18:43.249725
2,4,Dafa Aldian,asdasd-dsfdsdf,Fruits,2024-06-18 19:18:43.249113,2024-06-18 19:18:43.249725
3,5,Dafa Aldian,asdasd-dsfdsdf,Fruits,2024-06-18 19:18:43.249113,2024-06-18 19:18:43.249725
4,6,Dafa Aldian,Rose update,Fruits,2024-06-18 19:18:43.249113,2024-06-18 19:18:43.249725
5,7,Dafa Aldian,asdasd-dsfdsdf,Fruits,2024-06-18 19:18:43.249113,2024-06-18 19:18:43.249725
6,8,Dafa Aldian,asdaasd-sdasd,Fruits,2024-06-18 19:18:43.249113,2024-06-18 19:18:43.249725
7,9,Dafa Aldian,Rose update,Fruits,2024-06-18 19:18:43.249113,2024-06-18 19:18:43.249725
8,10,Dafa Aldian,asdasd-dsfdsdf,Fruits,2024-06-18 19:18:43.249113,2024-06-18 19:18:43.249725
9,11,Dafa Aldian,Rose update,Fruits,2024-06-18 19:18:43.249113,2024-06-18 19:18:43.249725


##### a. Save to CSV

In [725]:
df_dim_user_plant_histories.to_csv('../data_source_to_load/dim_planting_histories.csv', index=False)

#### 3.4.3.3 Merged dim_watering_histories

In [726]:
# Melakukan merge dalam satu baris kode dengan suffixes untuk menghindari konflik kolom
df_dim_watering_histories = df_watering_histories.merge(df_dim_users, on='user_id', how='left', suffixes=('', '_user')).merge(
    df_dim_plants, on='plant_id', how='left', suffixes=('', '_plant'))

df_dim_watering_histories = df_dim_watering_histories[['id', 'user_name', 'plant_name', 'created_at', 'updated_at']]
df_dim_watering_histories.rename(columns={'id': 'watering_history_id'}, inplace=True)
df_dim_watering_histories

Unnamed: 0,watering_history_id,user_name,plant_name,created_at,updated_at
0,7,Octaviano Ryan Eka Putra Hartanto,Tomato-Solanaceae,2024-06-18 10:28:04.543,2024-06-18 10:28:04.543


##### a. Save to CSV

In [727]:
df_dim_watering_histories.to_csv('../data_source_to_load/dim_watering_histories.csv', index=False)

#### 3.4.3.4 dim_customize_watering_reminders

In [728]:
df_dim_customize_watering_reminders.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   id           2 non-null      int64         
 1   my_plant_id  2 non-null      int64         
 2   time         2 non-null      category      
 3   recurring    2 non-null      int64         
 4   type         2 non-null      category      
 5   created_at   2 non-null      datetime64[ns]
 6   updated_at   2 non-null      datetime64[ns]
dtypes: category(2), datetime64[ns](2), int64(3)
memory usage: 444.0 bytes


In [729]:
df_dim_customize_watering_reminders.rename(columns={'id': 'customize_watering_reminder_id'}, inplace=True)
df_dim_customize_watering_reminders

Unnamed: 0,customize_watering_reminder_id,my_plant_id,time,recurring,type,created_at,updated_at
0,8,137,17:00,1,daily,2024-06-18 19:18:43.330886,2024-06-18 19:18:43.331500
1,9,153,17:00,1,daily,2024-06-18 19:18:43.330886,2024-06-18 19:18:43.331500


##### a. Save to CSV

In [730]:
df_dim_customize_watering_reminders.to_csv('../data_source_to_load/dim_customize_watering_reminders.csv', index=False)

#### 3.4.3.5 Merged Final Fact Table User Activities

In [731]:
# Menggabungkan keseluruhan id dataframe kedalam Fact User Activites
df_fact_user_activities = pd.merge(df_dim_my_plants, df_dim_user_plant_histories,
                                   on=["user_name", "plant_name"], how='outer',
                                   suffixes=('_my_plants', '_planting'))

df_fact_user_activities = pd.merge(df_fact_user_activities, df_dim_watering_histories,
                                   on=["user_name", "plant_name"], how='outer',
                                   suffixes=('_fact', '_watering'))

df_fact_user_activities = df_fact_user_activities[['my_plant_id', 'planting_history_id', 'watering_history_id']]
df_fact_user_activities

Unnamed: 0,my_plant_id,planting_history_id,watering_history_id
0,155.0,27.0,
1,150.0,22.0,
2,154.0,26.0,
3,153.0,25.0,
4,,3.0,
5,,6.0,
6,,9.0,
7,,11.0,
8,,13.0,
9,,14.0,


In [732]:
df_fact_user_activities['watering_count'] = df_fact_user_activities['watering_history_id'].nunique()
df_fact_user_activities['planting_count'] = df_fact_user_activities['planting_history_id'].nunique()
df_fact_user_activities['user_plant_count'] = df_fact_user_activities['my_plant_id'].nunique()
df_fact_user_activities

Unnamed: 0,my_plant_id,planting_history_id,watering_history_id,watering_count,planting_count,user_plant_count
0,155.0,27.0,,1,29,10
1,150.0,22.0,,1,29,10
2,154.0,26.0,,1,29,10
3,153.0,25.0,,1,29,10
4,,3.0,,1,29,10
5,,6.0,,1,29,10
6,,9.0,,1,29,10
7,,11.0,,1,29,10
8,,13.0,,1,29,10
9,,14.0,,1,29,10


#### a. Change Type Data

In [733]:
cleanse_dataframe_fact(df_fact_user_activities)
change_type_data(df_fact_user_activities)
df_fact_user_activities

Memeriksa missing values...
Mengisi missing values di kolom 'my_plant_id' dengan -1...
Mengisi missing values di kolom 'planting_history_id' dengan -1...
Mengisi missing values di kolom 'watering_history_id' dengan -1...
Memastikan tipe data yang benar...


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(-1, inplace=True)


Unnamed: 0,my_plant_id,planting_history_id,watering_history_id,watering_count,planting_count,user_plant_count
0,155,27,-1,1,29,10
1,150,22,-1,1,29,10
2,154,26,-1,1,29,10
3,153,25,-1,1,29,10
4,-1,3,-1,1,29,10
5,-1,6,-1,1,29,10
6,-1,9,-1,1,29,10
7,-1,11,-1,1,29,10
8,-1,13,-1,1,29,10
9,-1,14,-1,1,29,10


##### b. Save to CSV

In [734]:
df_fact_user_activities.to_csv('../data_source_to_load/fact_user_activities.csv', index=False)

### 3.4.4 Merged to Fact Table Plants Data

#### 3.4.4.1 Merged dim_plants

In [735]:
df_dim_plants.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19 entries, 0 to 18
Data columns (total 13 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   plant_id                 19 non-null     int64         
 1   plant_name               19 non-null     category      
 2   description              19 non-null     category      
 3   is_toxic                 19 non-null     int64         
 4   harvest_duration         19 non-null     int64         
 5   sunlight                 19 non-null     category      
 6   planting_time            19 non-null     category      
 7   plant_category_id        19 non-null     int64         
 8   climate_condition        19 non-null     category      
 9   plant_characteristic_id  19 non-null     int64         
 10  created_at               19 non-null     datetime64[ns]
 11  updated_at               19 non-null     datetime64[ns]
 12  additional_tips          19 non-null  

In [736]:
df_dim_plants

Unnamed: 0,plant_id,plant_name,description,is_toxic,harvest_duration,sunlight,planting_time,plant_category_id,climate_condition,plant_characteristic_id,created_at,updated_at,additional_tips
0,61,Rafflesia-Rafflesiaceae,Bunga Langka,1,6,Partsun,Autumn,11,Wet,0,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420,Tipsnya ya gituu
1,62,Tomato-Solanaceae,Tomatoes are a warm-season vegetable that grow...,0,3,Fullsun,Spring,1,Dry,0,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420,Support plants with stakes or cages to prevent...
2,63,Lavender - Lamiaceae,Lavender is a fragrant herb known for its arom...,0,6,Fullsun,Summer,7,Dry,0,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420,Mulch around plants to retain moisture and con...
3,64,Basil - Lamiaceae,Basil is a popular herb used in many cuisines ...,0,1,Partsun,Spring,5,Wet,0,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420,Regularly pinch off flower buds to prolong the...
4,65,Strawberry - Rosaceae,Strawberries are perennial plants known for th...,0,3,Shade,Spring,1,Dry,0,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420,Provide good air circulation to prevent fungal...
5,66,Rosemary - Lamiaceae,Rosemary is an aromatic herb with needle-like ...,0,1,Fullsun,Spring,5,Dry,0,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420,Protect from frost in colder climates or bring...
6,67,Sunflower - Asteraceae,Sunflowers are tall flowering plants with larg...,0,3,Fullsun,Spring,12,Dry,0,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420,Deadhead spent flowers to encourage continuous...
7,68,Peppermint - Lamiaceae,Peppermint is a hybrid mint known for its stro...,0,3,Partsun,Spring,5,Wet,0,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420,Divide peppermint plants every 2-3 years to pr...
8,69,Rose - Rosaceae,Roses are ornamental flowering plants known fo...,0,6,Fullsun,Spring,7,Wet,0,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420,Mulch around roses to retain moisture and supp...
9,75,Cucumber - Cucurbitaceae,Cucumbers are climbing plants known for their ...,0,3,Fullsun,Spring,1,Wet,0,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420,"Provide support for climbing plants, such as w..."


In [737]:
df_dim_plant_categories.rename(columns={'id': 'plant_category_id'}, inplace=True)
df_dim_plant_categories.rename(columns={'name': 'plant_category'}, inplace=True)
df_dim_plant_categories

Unnamed: 0,plant_category_id,plant_category,image_url,created_at,updated_at
0,1,Fruits,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-18 19:18:43.265216,2024-06-18 19:18:43.265774
1,2,Succulents,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-18 19:18:43.265216,2024-06-18 19:18:43.265774
2,4,Angiosperms,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-18 19:18:43.265216,2024-06-18 19:18:43.265774
3,5,Herbs,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-18 19:18:43.265216,2024-06-18 19:18:43.265774
4,7,Shrubs,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-18 19:18:43.265216,2024-06-18 19:18:43.265774
5,8,Ferns,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-18 19:18:43.265216,2024-06-18 19:18:43.265774
6,9,Trees,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-18 19:18:43.265216,2024-06-18 19:18:43.265774
7,10,Climbers,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-18 19:18:43.265216,2024-06-18 19:18:43.265774
8,11,Conifers,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-18 19:18:43.265216,2024-06-18 19:18:43.265774
9,12,Annual,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-18 19:18:43.265216,2024-06-18 19:18:43.265774


In [738]:
# Melakukan merge dalam satu baris kode dengan suffixes untuk menghindari konflik kolom
df_dim_plants = df_dim_plants.merge(
    df_dim_plant_categories, on='plant_category_id', how='left', suffixes=('', '_category'))

# Memeriksa nama kolom setelah merge
print(df_dim_plants.columns)

Index(['plant_id', 'plant_name', 'description', 'is_toxic', 'harvest_duration',
       'sunlight', 'planting_time', 'plant_category_id', 'climate_condition',
       'plant_characteristic_id', 'created_at', 'updated_at',
       'additional_tips', 'plant_category', 'image_url', 'created_at_category',
       'updated_at_category'],
      dtype='object')


In [739]:
df_dim_plants = df_dim_plants[['plant_id', 'plant_name', 'description', 'is_toxic', 'harvest_duration',
                            'sunlight', 'planting_time', 'plant_category', 'climate_condition',
                            'additional_tips', 'created_at', 'updated_at']]
df_dim_plants

Unnamed: 0,plant_id,plant_name,description,is_toxic,harvest_duration,sunlight,planting_time,plant_category,climate_condition,additional_tips,created_at,updated_at
0,61,Rafflesia-Rafflesiaceae,Bunga Langka,1,6,Partsun,Autumn,Conifers,Wet,Tipsnya ya gituu,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420
1,62,Tomato-Solanaceae,Tomatoes are a warm-season vegetable that grow...,0,3,Fullsun,Spring,Fruits,Dry,Support plants with stakes or cages to prevent...,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420
2,63,Lavender - Lamiaceae,Lavender is a fragrant herb known for its arom...,0,6,Fullsun,Summer,Shrubs,Dry,Mulch around plants to retain moisture and con...,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420
3,64,Basil - Lamiaceae,Basil is a popular herb used in many cuisines ...,0,1,Partsun,Spring,Herbs,Wet,Regularly pinch off flower buds to prolong the...,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420
4,65,Strawberry - Rosaceae,Strawberries are perennial plants known for th...,0,3,Shade,Spring,Fruits,Dry,Provide good air circulation to prevent fungal...,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420
5,66,Rosemary - Lamiaceae,Rosemary is an aromatic herb with needle-like ...,0,1,Fullsun,Spring,Herbs,Dry,Protect from frost in colder climates or bring...,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420
6,67,Sunflower - Asteraceae,Sunflowers are tall flowering plants with larg...,0,3,Fullsun,Spring,Annual,Dry,Deadhead spent flowers to encourage continuous...,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420
7,68,Peppermint - Lamiaceae,Peppermint is a hybrid mint known for its stro...,0,3,Partsun,Spring,Herbs,Wet,Divide peppermint plants every 2-3 years to pr...,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420
8,69,Rose - Rosaceae,Roses are ornamental flowering plants known fo...,0,6,Fullsun,Spring,Shrubs,Wet,Mulch around roses to retain moisture and supp...,2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420
9,75,Cucumber - Cucurbitaceae,Cucumbers are climbing plants known for their ...,0,3,Fullsun,Spring,Fruits,Wet,"Provide support for climbing plants, such as w...",2024-06-18 19:18:43.466929,2024-06-18 19:18:43.468420


##### a. Save to CSV

In [740]:
df_dim_plants.to_csv('../data_source_to_load/dim_plants.csv', index=False)

#### 3.4.4.2 dim_watering_reminders

In [741]:
df_dim_plant_reminders = df_dim_plant_reminders[['id', 'plant_id', 'watering_frequency', 'each', 'watering_amount',
       'unit', 'watering_time', 'weather_condition', 'condition_description',
       'created_at', 'updated_at']]
df_dim_plant_reminders

Unnamed: 0,id,plant_id,watering_frequency,each,watering_amount,unit,watering_time,weather_condition,condition_description,created_at,updated_at
0,61,61,5,Week,300,Liter (l),16:30,"Bright Sun,Rainy,Stormy Rain","Sun,Rainy,Stormy",2024-06-18 19:18:43.567171,2024-06-18 19:18:43.568123
1,62,62,3,Week,2,Liter (l),09:00,"Bright Sun,Part Cloudy Sun,Rainy,Stormy Rain","Water in the early morning or late afternoon.,...",2024-06-18 19:18:43.567171,2024-06-18 19:18:43.568123
2,63,63,1,Week,1,Liter (l),08:00,"Bright Sun,Part Cloudy Sun,Rainy,Stormy Rain","Water early in the morning.,Maintain regular w...",2024-06-18 19:18:43.567171,2024-06-18 19:18:43.568123
3,64,64,4,Week,500,Milliliter (ml),08:30,"Bright Sun,Part Cloudy Sun,Rainy,Stormy Rain","Water in the early morning.,Maintain regular w...",2024-06-18 19:18:43.567171,2024-06-18 19:18:43.568123
4,65,65,2,Week,1,Liter (l),07:30,"Bright Sun,Part Cloudy Sun,Rainy,Stormy Rain","Water in the early morning.,Maintain regular w...",2024-06-18 19:18:43.567171,2024-06-18 19:18:43.568123
5,66,66,1,Week,500,Milliliter (ml),07:30,"Bright Sun,Part Cloudy Sun,Rainy,Stormy Rain","Water early in the morning.,Maintain regular w...",2024-06-18 19:18:43.567171,2024-06-18 19:18:43.568123
6,67,67,2,Week,1,Liter (l),08:00,"Bright Sun,Part Cloudy Sun,Rainy,Stormy Rain","Water in the early morning or late afternoon.,...",2024-06-18 19:18:43.567171,2024-06-18 19:18:43.568123
7,68,68,2,Week,500,Milliliter (ml),08:00,"Bright Sun,Part Cloudy Sun,Rainy,Stormy Rain","Water in the early morning or late afternoon.,...",2024-06-18 19:18:43.567171,2024-06-18 19:18:43.568123
8,69,69,2,Week,2,Liter (l),08:00,"Bright Sun,Part Cloudy Sun,Rainy,Stormy Rain","Water early in the morning.,Maintain regular w...",2024-06-18 19:18:43.567171,2024-06-18 19:18:43.568123
9,75,75,3,Week,1,Liter (l),07:30,"Bright Sun,Part Cloudy Sun,Rainy,Stormy Rain","Water in the early morning.,Maintain a regular...",2024-06-18 19:18:43.567171,2024-06-18 19:18:43.568123


In [742]:
df_dim_plant_reminders.rename(columns={'id': 'watering_reminders_id'}, inplace=True)
df_dim_watering_reminders = df_dim_plant_reminders
df_dim_watering_reminders

Unnamed: 0,watering_reminders_id,plant_id,watering_frequency,each,watering_amount,unit,watering_time,weather_condition,condition_description,created_at,updated_at
0,61,61,5,Week,300,Liter (l),16:30,"Bright Sun,Rainy,Stormy Rain","Sun,Rainy,Stormy",2024-06-18 19:18:43.567171,2024-06-18 19:18:43.568123
1,62,62,3,Week,2,Liter (l),09:00,"Bright Sun,Part Cloudy Sun,Rainy,Stormy Rain","Water in the early morning or late afternoon.,...",2024-06-18 19:18:43.567171,2024-06-18 19:18:43.568123
2,63,63,1,Week,1,Liter (l),08:00,"Bright Sun,Part Cloudy Sun,Rainy,Stormy Rain","Water early in the morning.,Maintain regular w...",2024-06-18 19:18:43.567171,2024-06-18 19:18:43.568123
3,64,64,4,Week,500,Milliliter (ml),08:30,"Bright Sun,Part Cloudy Sun,Rainy,Stormy Rain","Water in the early morning.,Maintain regular w...",2024-06-18 19:18:43.567171,2024-06-18 19:18:43.568123
4,65,65,2,Week,1,Liter (l),07:30,"Bright Sun,Part Cloudy Sun,Rainy,Stormy Rain","Water in the early morning.,Maintain regular w...",2024-06-18 19:18:43.567171,2024-06-18 19:18:43.568123
5,66,66,1,Week,500,Milliliter (ml),07:30,"Bright Sun,Part Cloudy Sun,Rainy,Stormy Rain","Water early in the morning.,Maintain regular w...",2024-06-18 19:18:43.567171,2024-06-18 19:18:43.568123
6,67,67,2,Week,1,Liter (l),08:00,"Bright Sun,Part Cloudy Sun,Rainy,Stormy Rain","Water in the early morning or late afternoon.,...",2024-06-18 19:18:43.567171,2024-06-18 19:18:43.568123
7,68,68,2,Week,500,Milliliter (ml),08:00,"Bright Sun,Part Cloudy Sun,Rainy,Stormy Rain","Water in the early morning or late afternoon.,...",2024-06-18 19:18:43.567171,2024-06-18 19:18:43.568123
8,69,69,2,Week,2,Liter (l),08:00,"Bright Sun,Part Cloudy Sun,Rainy,Stormy Rain","Water early in the morning.,Maintain regular w...",2024-06-18 19:18:43.567171,2024-06-18 19:18:43.568123
9,75,75,3,Week,1,Liter (l),07:30,"Bright Sun,Part Cloudy Sun,Rainy,Stormy Rain","Water in the early morning.,Maintain a regular...",2024-06-18 19:18:43.567171,2024-06-18 19:18:43.568123


##### a. Merged to Fact Table Plants Data

In [743]:
# Menggabungkan kedua dataframe
df_fact_plants_data = pd.merge(df_dim_watering_reminders, df_dim_plants, left_on="plant_id", right_on="plant_id", suffixes=('_watering_reminders', '_plant'))

# Menampilkan hasil gabungan
df_fact_plants_data.columns

Index(['watering_reminders_id', 'plant_id', 'watering_frequency', 'each',
       'watering_amount', 'unit', 'watering_time', 'weather_condition',
       'condition_description', 'created_at_watering_reminders',
       'updated_at_watering_reminders', 'plant_name', 'description',
       'is_toxic', 'harvest_duration', 'sunlight', 'planting_time',
       'plant_category', 'climate_condition', 'additional_tips',
       'created_at_plant', 'updated_at_plant'],
      dtype='object')

In [744]:
df_fact_plants_data = df_fact_plants_data[['plant_id', 'watering_reminders_id']]
df_fact_plants_data

Unnamed: 0,plant_id,watering_reminders_id
0,61,61
1,62,62
2,63,63
3,64,64
4,65,65
5,66,66
6,67,67
7,68,68
8,69,69
9,75,75


##### b. Save to CSV

In [745]:
df_dim_plant_reminders = df_dim_plant_reminders[['watering_reminders_id', 'watering_frequency', 'each', 'watering_amount',
       'unit', 'watering_time', 'weather_condition', 'condition_description',
       'created_at', 'updated_at']]
df_dim_plant_reminders

Unnamed: 0,watering_reminders_id,watering_frequency,each,watering_amount,unit,watering_time,weather_condition,condition_description,created_at,updated_at
0,61,5,Week,300,Liter (l),16:30,"Bright Sun,Rainy,Stormy Rain","Sun,Rainy,Stormy",2024-06-18 19:18:43.567171,2024-06-18 19:18:43.568123
1,62,3,Week,2,Liter (l),09:00,"Bright Sun,Part Cloudy Sun,Rainy,Stormy Rain","Water in the early morning or late afternoon.,...",2024-06-18 19:18:43.567171,2024-06-18 19:18:43.568123
2,63,1,Week,1,Liter (l),08:00,"Bright Sun,Part Cloudy Sun,Rainy,Stormy Rain","Water early in the morning.,Maintain regular w...",2024-06-18 19:18:43.567171,2024-06-18 19:18:43.568123
3,64,4,Week,500,Milliliter (ml),08:30,"Bright Sun,Part Cloudy Sun,Rainy,Stormy Rain","Water in the early morning.,Maintain regular w...",2024-06-18 19:18:43.567171,2024-06-18 19:18:43.568123
4,65,2,Week,1,Liter (l),07:30,"Bright Sun,Part Cloudy Sun,Rainy,Stormy Rain","Water in the early morning.,Maintain regular w...",2024-06-18 19:18:43.567171,2024-06-18 19:18:43.568123
5,66,1,Week,500,Milliliter (ml),07:30,"Bright Sun,Part Cloudy Sun,Rainy,Stormy Rain","Water early in the morning.,Maintain regular w...",2024-06-18 19:18:43.567171,2024-06-18 19:18:43.568123
6,67,2,Week,1,Liter (l),08:00,"Bright Sun,Part Cloudy Sun,Rainy,Stormy Rain","Water in the early morning or late afternoon.,...",2024-06-18 19:18:43.567171,2024-06-18 19:18:43.568123
7,68,2,Week,500,Milliliter (ml),08:00,"Bright Sun,Part Cloudy Sun,Rainy,Stormy Rain","Water in the early morning or late afternoon.,...",2024-06-18 19:18:43.567171,2024-06-18 19:18:43.568123
8,69,2,Week,2,Liter (l),08:00,"Bright Sun,Part Cloudy Sun,Rainy,Stormy Rain","Water early in the morning.,Maintain regular w...",2024-06-18 19:18:43.567171,2024-06-18 19:18:43.568123
9,75,3,Week,1,Liter (l),07:30,"Bright Sun,Part Cloudy Sun,Rainy,Stormy Rain","Water in the early morning.,Maintain a regular...",2024-06-18 19:18:43.567171,2024-06-18 19:18:43.568123


In [746]:
df_dim_plant_reminders.to_csv('../data_source_to_load/dim_watering_reminders.csv', index=False)

#### 3.4.4.3 dim_plant_faqs

In [747]:
df_dim_plant_faqs

Unnamed: 0,id,plant_id,question,answer,created_at,updated_at
0,76,61,Mau tanya apa,Ya gatauu?,2024-06-18 19:18:43.392587,2024-06-18 19:18:43.393761
1,77,62,Recommendation for fertilizer types and amount...,Use a balanced 10-10-10 fertilizer at planting...,2024-06-18 19:18:43.392587,2024-06-18 19:18:43.393761
2,78,62,Recommendation for safe pest control to addres...,Use neem oil or insecticidal soap to manage co...,2024-06-18 19:18:43.392587,2024-06-18 19:18:43.393761
3,79,62,How to improve tomato flavor?,"Provide ample sunlight, avoid over-fertilizati...",2024-06-18 19:18:43.392587,2024-06-18 19:18:43.393761
4,80,62,How to prevent blossom end rot in tomatoes?,Blossom end rot is caused by calcium deficienc...,2024-06-18 19:18:43.392587,2024-06-18 19:18:43.393761
5,81,63,Recommendation for fertilizer types and amount...,Use a slow-release fertilizer with a higher ra...,2024-06-18 19:18:43.392587,2024-06-18 19:18:43.393761
6,82,63,Recommendation for safe pest control to addres...,Introduce beneficial insects like ladybugs to ...,2024-06-18 19:18:43.392587,2024-06-18 19:18:43.393761
7,83,63,Can lavender be grown indoors?,Lavender can be grown indoors in containers wi...,2024-06-18 19:18:43.392587,2024-06-18 19:18:43.393761
8,84,63,How often should lavender be pruned?,Lavender should be lightly pruned after flower...,2024-06-18 19:18:43.392587,2024-06-18 19:18:43.393761
9,85,64,Recommendation for fertilizer types and amount...,Use a balanced liquid fertilizer like 10-10-10...,2024-06-18 19:18:43.392587,2024-06-18 19:18:43.393761


In [748]:
df_dim_plant_faqs.rename(columns={'id': 'plant_faqs_id'}, inplace=True)
df_dim_plant_faqs

Unnamed: 0,plant_faqs_id,plant_id,question,answer,created_at,updated_at
0,76,61,Mau tanya apa,Ya gatauu?,2024-06-18 19:18:43.392587,2024-06-18 19:18:43.393761
1,77,62,Recommendation for fertilizer types and amount...,Use a balanced 10-10-10 fertilizer at planting...,2024-06-18 19:18:43.392587,2024-06-18 19:18:43.393761
2,78,62,Recommendation for safe pest control to addres...,Use neem oil or insecticidal soap to manage co...,2024-06-18 19:18:43.392587,2024-06-18 19:18:43.393761
3,79,62,How to improve tomato flavor?,"Provide ample sunlight, avoid over-fertilizati...",2024-06-18 19:18:43.392587,2024-06-18 19:18:43.393761
4,80,62,How to prevent blossom end rot in tomatoes?,Blossom end rot is caused by calcium deficienc...,2024-06-18 19:18:43.392587,2024-06-18 19:18:43.393761
5,81,63,Recommendation for fertilizer types and amount...,Use a slow-release fertilizer with a higher ra...,2024-06-18 19:18:43.392587,2024-06-18 19:18:43.393761
6,82,63,Recommendation for safe pest control to addres...,Introduce beneficial insects like ladybugs to ...,2024-06-18 19:18:43.392587,2024-06-18 19:18:43.393761
7,83,63,Can lavender be grown indoors?,Lavender can be grown indoors in containers wi...,2024-06-18 19:18:43.392587,2024-06-18 19:18:43.393761
8,84,63,How often should lavender be pruned?,Lavender should be lightly pruned after flower...,2024-06-18 19:18:43.392587,2024-06-18 19:18:43.393761
9,85,64,Recommendation for fertilizer types and amount...,Use a balanced liquid fertilizer like 10-10-10...,2024-06-18 19:18:43.392587,2024-06-18 19:18:43.393761


##### a. Merged to Fact Table Plants Data

In [749]:
# Menggabungkan kedua dataframe
df_fact_plants_data = pd.merge(df_dim_plant_faqs, df_fact_plants_data, left_on="plant_id", right_on="plant_id", suffixes=('_faqs', '_fact'))

# Menampilkan hasil gabungan
df_fact_plants_data.columns

Index(['plant_faqs_id', 'plant_id', 'question', 'answer', 'created_at',
       'updated_at', 'watering_reminders_id'],
      dtype='object')

In [750]:
df_fact_plants_data = df_fact_plants_data[['plant_id', 'plant_faqs_id', 'watering_reminders_id']]
df_fact_plants_data

Unnamed: 0,plant_id,plant_faqs_id,watering_reminders_id
0,61,76,61
1,62,77,62
2,62,78,62
3,62,79,62
4,62,80,62
5,63,81,63
6,63,82,63
7,63,83,63
8,63,84,63
9,64,85,64


##### b. Save to CSV 

In [751]:
df_dim_plant_faqs = df_dim_plant_faqs[['plant_faqs_id', 'question', 'answer',
       'created_at', 'updated_at']]
df_dim_plant_faqs

Unnamed: 0,plant_faqs_id,question,answer,created_at,updated_at
0,76,Mau tanya apa,Ya gatauu?,2024-06-18 19:18:43.392587,2024-06-18 19:18:43.393761
1,77,Recommendation for fertilizer types and amount...,Use a balanced 10-10-10 fertilizer at planting...,2024-06-18 19:18:43.392587,2024-06-18 19:18:43.393761
2,78,Recommendation for safe pest control to addres...,Use neem oil or insecticidal soap to manage co...,2024-06-18 19:18:43.392587,2024-06-18 19:18:43.393761
3,79,How to improve tomato flavor?,"Provide ample sunlight, avoid over-fertilizati...",2024-06-18 19:18:43.392587,2024-06-18 19:18:43.393761
4,80,How to prevent blossom end rot in tomatoes?,Blossom end rot is caused by calcium deficienc...,2024-06-18 19:18:43.392587,2024-06-18 19:18:43.393761
5,81,Recommendation for fertilizer types and amount...,Use a slow-release fertilizer with a higher ra...,2024-06-18 19:18:43.392587,2024-06-18 19:18:43.393761
6,82,Recommendation for safe pest control to addres...,Introduce beneficial insects like ladybugs to ...,2024-06-18 19:18:43.392587,2024-06-18 19:18:43.393761
7,83,Can lavender be grown indoors?,Lavender can be grown indoors in containers wi...,2024-06-18 19:18:43.392587,2024-06-18 19:18:43.393761
8,84,How often should lavender be pruned?,Lavender should be lightly pruned after flower...,2024-06-18 19:18:43.392587,2024-06-18 19:18:43.393761
9,85,Recommendation for fertilizer types and amount...,Use a balanced liquid fertilizer like 10-10-10...,2024-06-18 19:18:43.392587,2024-06-18 19:18:43.393761


In [752]:
df_dim_plant_faqs.to_csv('../data_source_to_load/dim_plant_faqs.csv', index=False)

#### 3.4.4.4 dim_plant_instructions

In [753]:
df_dim_plant_instructions

Unnamed: 0,id,plant_id,step_number,step_title,step_description,step_image_url,additional_tips,created_at,updated_at,instruction_category_id
0,92,61,1,Apaya,enaknya apa,https://res.cloudinary.com/dxrz0cg5z/image/upl...,-,2024-06-18 19:18:43.313241,2024-06-18 19:18:43.313956,3
1,93,62,1,Preparing the soil,Prepare well-draining soil rich in organic mat...,https://res.cloudinary.com/dxrz0cg5z/image/upl...,-,2024-06-18 19:18:43.313241,2024-06-18 19:18:43.313956,1
2,94,62,2,Sowing tomato seeds,Sow tomato seeds indoors 6-8 weeks before the ...,https://res.cloudinary.com/dxrz0cg5z/image/upl...,-,2024-06-18 19:18:43.313241,2024-06-18 19:18:43.313956,2
3,95,62,3,Tomato Plant Care,Water regularly to keep the soil moist but not...,https://res.cloudinary.com/dxrz0cg5z/image/upl...,-,2024-06-18 19:18:43.313241,2024-06-18 19:18:43.313956,3
4,96,62,4,Harvesting Tomatoes,Harvest tomatoes when they are fully colored a...,https://res.cloudinary.com/dxrz0cg5z/image/upl...,-,2024-06-18 19:18:43.313241,2024-06-18 19:18:43.313956,4
...,...,...,...,...,...,...,...,...,...,...
56,168,80,4,Harvesting Aloe Vera,Harvest mature leaves as needed by cutting the...,https://res.cloudinary.com/dxrz0cg5z/image/upl...,-,2024-06-18 19:18:43.313241,2024-06-18 19:18:43.313956,4
57,169,81,1,gk tau,biarkan tumbuh secara alami,-,-,2024-06-18 19:18:43.313241,2024-06-18 19:18:43.313956,2
58,173,85,1,Cara Merawat,Biarkan terkena matahari sepanjang hari dan be...,https://res.cloudinary.com/dxrz0cg5z/image/upl...,-,2024-06-18 19:18:43.313241,2024-06-18 19:18:43.313956,3
59,174,86,1,Preparing the Soil,Use fertile soil with good drainage. Incorpora...,https://res.cloudinary.com/dxrz0cg5z/image/upl...,-,2024-06-18 19:18:43.313241,2024-06-18 19:18:43.313956,1


In [754]:
df_dim_plant_instructions.rename(columns={'id': 'plant_instruction_id'}, inplace=True)
df_dim_plant_instructions

Unnamed: 0,plant_instruction_id,plant_id,step_number,step_title,step_description,step_image_url,additional_tips,created_at,updated_at,instruction_category_id
0,92,61,1,Apaya,enaknya apa,https://res.cloudinary.com/dxrz0cg5z/image/upl...,-,2024-06-18 19:18:43.313241,2024-06-18 19:18:43.313956,3
1,93,62,1,Preparing the soil,Prepare well-draining soil rich in organic mat...,https://res.cloudinary.com/dxrz0cg5z/image/upl...,-,2024-06-18 19:18:43.313241,2024-06-18 19:18:43.313956,1
2,94,62,2,Sowing tomato seeds,Sow tomato seeds indoors 6-8 weeks before the ...,https://res.cloudinary.com/dxrz0cg5z/image/upl...,-,2024-06-18 19:18:43.313241,2024-06-18 19:18:43.313956,2
3,95,62,3,Tomato Plant Care,Water regularly to keep the soil moist but not...,https://res.cloudinary.com/dxrz0cg5z/image/upl...,-,2024-06-18 19:18:43.313241,2024-06-18 19:18:43.313956,3
4,96,62,4,Harvesting Tomatoes,Harvest tomatoes when they are fully colored a...,https://res.cloudinary.com/dxrz0cg5z/image/upl...,-,2024-06-18 19:18:43.313241,2024-06-18 19:18:43.313956,4
...,...,...,...,...,...,...,...,...,...,...
56,168,80,4,Harvesting Aloe Vera,Harvest mature leaves as needed by cutting the...,https://res.cloudinary.com/dxrz0cg5z/image/upl...,-,2024-06-18 19:18:43.313241,2024-06-18 19:18:43.313956,4
57,169,81,1,gk tau,biarkan tumbuh secara alami,-,-,2024-06-18 19:18:43.313241,2024-06-18 19:18:43.313956,2
58,173,85,1,Cara Merawat,Biarkan terkena matahari sepanjang hari dan be...,https://res.cloudinary.com/dxrz0cg5z/image/upl...,-,2024-06-18 19:18:43.313241,2024-06-18 19:18:43.313956,3
59,174,86,1,Preparing the Soil,Use fertile soil with good drainage. Incorpora...,https://res.cloudinary.com/dxrz0cg5z/image/upl...,-,2024-06-18 19:18:43.313241,2024-06-18 19:18:43.313956,1


In [755]:
df_dim_plant_instruction_categories.rename(columns={'id': 'instruction_category_id'}, inplace=True)
df_dim_plant_instruction_categories

Unnamed: 0,instruction_category_id,name,description,image_url,created_at,updated_at
0,1,Soil Preparation,"In this section, you will learn how to properl...",https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-18 19:18:43.525201,2024-06-18 19:18:43.526034
1,2,Planting Seeds,"At this stage, you will learn how to properly ...",https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-18 19:18:43.525201,2024-06-18 19:18:43.526034
2,3,Plant Care,"In this section, you will learn how to properl...",https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-18 19:18:43.525201,2024-06-18 19:18:43.526034
3,4,Harvest,"At this stage, you will learn how to harvest p...",https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-18 19:18:43.525201,2024-06-18 19:18:43.526034


In [756]:
# Melakukan merge dalam satu baris kode dengan suffixes untuk menghindari konflik kolom
df_dim_plant_instructions = df_dim_plant_instructions.merge(
    df_dim_plant_instruction_categories, on='instruction_category_id', how='left', suffixes=('', '_category'))

# Memeriksa nama kolom setelah merge
print(df_dim_plant_instructions.columns)

Index(['plant_instruction_id', 'plant_id', 'step_number', 'step_title',
       'step_description', 'step_image_url', 'additional_tips', 'created_at',
       'updated_at', 'instruction_category_id', 'name', 'description',
       'image_url', 'created_at_category', 'updated_at_category'],
      dtype='object')


In [757]:
df_dim_plant_instructions = df_dim_plant_instructions[['plant_instruction_id', 'name', 'plant_id', 'step_number', 'step_title',
       'step_description', 'step_image_url', 'additional_tips', 'created_at',
       'updated_at']]

df_dim_plant_instructions.rename(columns={'name': 'name_instruction_categories'}, inplace=True)

df_dim_plant_instructions

Unnamed: 0,plant_instruction_id,name_instruction_categories,plant_id,step_number,step_title,step_description,step_image_url,additional_tips,created_at,updated_at
0,92,Plant Care,61,1,Apaya,enaknya apa,https://res.cloudinary.com/dxrz0cg5z/image/upl...,-,2024-06-18 19:18:43.313241,2024-06-18 19:18:43.313956
1,93,Soil Preparation,62,1,Preparing the soil,Prepare well-draining soil rich in organic mat...,https://res.cloudinary.com/dxrz0cg5z/image/upl...,-,2024-06-18 19:18:43.313241,2024-06-18 19:18:43.313956
2,94,Planting Seeds,62,2,Sowing tomato seeds,Sow tomato seeds indoors 6-8 weeks before the ...,https://res.cloudinary.com/dxrz0cg5z/image/upl...,-,2024-06-18 19:18:43.313241,2024-06-18 19:18:43.313956
3,95,Plant Care,62,3,Tomato Plant Care,Water regularly to keep the soil moist but not...,https://res.cloudinary.com/dxrz0cg5z/image/upl...,-,2024-06-18 19:18:43.313241,2024-06-18 19:18:43.313956
4,96,Harvest,62,4,Harvesting Tomatoes,Harvest tomatoes when they are fully colored a...,https://res.cloudinary.com/dxrz0cg5z/image/upl...,-,2024-06-18 19:18:43.313241,2024-06-18 19:18:43.313956
...,...,...,...,...,...,...,...,...,...,...
56,168,Harvest,80,4,Harvesting Aloe Vera,Harvest mature leaves as needed by cutting the...,https://res.cloudinary.com/dxrz0cg5z/image/upl...,-,2024-06-18 19:18:43.313241,2024-06-18 19:18:43.313956
57,169,Planting Seeds,81,1,gk tau,biarkan tumbuh secara alami,-,-,2024-06-18 19:18:43.313241,2024-06-18 19:18:43.313956
58,173,Plant Care,85,1,Cara Merawat,Biarkan terkena matahari sepanjang hari dan be...,https://res.cloudinary.com/dxrz0cg5z/image/upl...,-,2024-06-18 19:18:43.313241,2024-06-18 19:18:43.313956
59,174,Soil Preparation,86,1,Preparing the Soil,Use fertile soil with good drainage. Incorpora...,https://res.cloudinary.com/dxrz0cg5z/image/upl...,-,2024-06-18 19:18:43.313241,2024-06-18 19:18:43.313956


##### a. Merged to Fact Table Plants Data

In [758]:
# Menggabungkan kedua dataframe
df_fact_plants_data = pd.merge(df_dim_plant_instructions, df_fact_plants_data, left_on="plant_id", 
                               right_on="plant_id", suffixes=('_instructions', '_fact'))

# Menampilkan hasil gabungan
df_fact_plants_data.columns

Index(['plant_instruction_id', 'name_instruction_categories', 'plant_id',
       'step_number', 'step_title', 'step_description', 'step_image_url',
       'additional_tips', 'created_at', 'updated_at', 'plant_faqs_id',
       'watering_reminders_id'],
      dtype='object')

In [759]:
df_fact_plants_data = df_fact_plants_data[['plant_id', 'plant_faqs_id', 'plant_instruction_id', 'watering_reminders_id']]

df_fact_plants_data

Unnamed: 0,plant_id,plant_faqs_id,plant_instruction_id,watering_reminders_id
0,61,76,92,61
1,62,77,93,62
2,62,78,93,62
3,62,79,93,62
4,62,80,93,62
...,...,...,...,...
220,80,131,168,80
221,81,132,169,81
222,85,133,173,85
223,86,134,174,86


##### b. Save to CSV

In [760]:
df_dim_plant_instructions.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 61 entries, 0 to 60
Data columns (total 10 columns):
 #   Column                       Non-Null Count  Dtype         
---  ------                       --------------  -----         
 0   plant_instruction_id         61 non-null     int64         
 1   name_instruction_categories  61 non-null     category      
 2   plant_id                     61 non-null     int64         
 3   step_number                  61 non-null     int64         
 4   step_title                   61 non-null     category      
 5   step_description             61 non-null     category      
 6   step_image_url               61 non-null     category      
 7   additional_tips              61 non-null     category      
 8   created_at                   61 non-null     datetime64[ns]
 9   updated_at                   61 non-null     datetime64[ns]
dtypes: category(5), datetime64[ns](2), int64(3)
memory usage: 10.6 KB


In [761]:
df_dim_plant_instructions = df_dim_plant_instructions[['plant_instruction_id', 'name_instruction_categories', 'step_number', 'step_title',
       'step_description', 'step_image_url', 'additional_tips', 'created_at',
       'updated_at']]
df_dim_plant_instructions

Unnamed: 0,plant_instruction_id,name_instruction_categories,step_number,step_title,step_description,step_image_url,additional_tips,created_at,updated_at
0,92,Plant Care,1,Apaya,enaknya apa,https://res.cloudinary.com/dxrz0cg5z/image/upl...,-,2024-06-18 19:18:43.313241,2024-06-18 19:18:43.313956
1,93,Soil Preparation,1,Preparing the soil,Prepare well-draining soil rich in organic mat...,https://res.cloudinary.com/dxrz0cg5z/image/upl...,-,2024-06-18 19:18:43.313241,2024-06-18 19:18:43.313956
2,94,Planting Seeds,2,Sowing tomato seeds,Sow tomato seeds indoors 6-8 weeks before the ...,https://res.cloudinary.com/dxrz0cg5z/image/upl...,-,2024-06-18 19:18:43.313241,2024-06-18 19:18:43.313956
3,95,Plant Care,3,Tomato Plant Care,Water regularly to keep the soil moist but not...,https://res.cloudinary.com/dxrz0cg5z/image/upl...,-,2024-06-18 19:18:43.313241,2024-06-18 19:18:43.313956
4,96,Harvest,4,Harvesting Tomatoes,Harvest tomatoes when they are fully colored a...,https://res.cloudinary.com/dxrz0cg5z/image/upl...,-,2024-06-18 19:18:43.313241,2024-06-18 19:18:43.313956
...,...,...,...,...,...,...,...,...,...
56,168,Harvest,4,Harvesting Aloe Vera,Harvest mature leaves as needed by cutting the...,https://res.cloudinary.com/dxrz0cg5z/image/upl...,-,2024-06-18 19:18:43.313241,2024-06-18 19:18:43.313956
57,169,Planting Seeds,1,gk tau,biarkan tumbuh secara alami,-,-,2024-06-18 19:18:43.313241,2024-06-18 19:18:43.313956
58,173,Plant Care,1,Cara Merawat,Biarkan terkena matahari sepanjang hari dan be...,https://res.cloudinary.com/dxrz0cg5z/image/upl...,-,2024-06-18 19:18:43.313241,2024-06-18 19:18:43.313956
59,174,Soil Preparation,1,Preparing the Soil,Use fertile soil with good drainage. Incorpora...,https://res.cloudinary.com/dxrz0cg5z/image/upl...,-,2024-06-18 19:18:43.313241,2024-06-18 19:18:43.313956


In [762]:
df_dim_plant_instructions.to_csv('../data_source_to_load/dim_plant_instructions.csv', index=False)

#### 3.4.4.5 dim_plant_characteristics

In [763]:
df_dim_plant_characteristics

Unnamed: 0,id,plant_id,height,height_unit,wide,wide_unit,leaf_color
0,61,61,3,Meter,300,Centimeter,Red
1,62,62,2,Meter,1,Meter,Green
2,63,63,600,Centimeter,900,Centimeter,Green
3,64,64,400,Centimeter,300,Centimeter,Green
4,65,65,200,Centimeter,300,Centimeter,Green
5,66,66,1200,Centimeter,600,Centimeter,Green
6,67,67,2,Meter,1,Meter,Green
7,68,68,500,Centimeter,300,Centimeter,Green
8,69,69,150,Centimeter,75,Centimeter,Green
9,75,75,500,Centimeter,400,Centimeter,Green


In [764]:
df_dim_plant_characteristics.rename(columns={'id': 'plant_characteristic_id'}, inplace=True)
df_dim_plant_characteristics

Unnamed: 0,plant_characteristic_id,plant_id,height,height_unit,wide,wide_unit,leaf_color
0,61,61,3,Meter,300,Centimeter,Red
1,62,62,2,Meter,1,Meter,Green
2,63,63,600,Centimeter,900,Centimeter,Green
3,64,64,400,Centimeter,300,Centimeter,Green
4,65,65,200,Centimeter,300,Centimeter,Green
5,66,66,1200,Centimeter,600,Centimeter,Green
6,67,67,2,Meter,1,Meter,Green
7,68,68,500,Centimeter,300,Centimeter,Green
8,69,69,150,Centimeter,75,Centimeter,Green
9,75,75,500,Centimeter,400,Centimeter,Green


##### a. Merged to Fact Table Plants Data

In [765]:
# Menggabungkan kedua dataframe
df_fact_plants_data = pd.merge(df_dim_plant_characteristics, df_fact_plants_data, left_on="plant_id", 
                               right_on="plant_id", suffixes=('_characteristics', '_fact'))

# Menampilkan hasil gabungan
df_fact_plants_data.columns

Index(['plant_characteristic_id', 'plant_id', 'height', 'height_unit', 'wide',
       'wide_unit', 'leaf_color', 'plant_faqs_id', 'plant_instruction_id',
       'watering_reminders_id'],
      dtype='object')

In [766]:
df_fact_plants_data = df_fact_plants_data[['plant_id', 'plant_faqs_id', 'plant_characteristic_id', 'plant_instruction_id', 'watering_reminders_id']]

df_fact_plants_data

Unnamed: 0,plant_id,plant_faqs_id,plant_characteristic_id,plant_instruction_id,watering_reminders_id
0,61,76,61,92,61
1,62,77,62,93,62
2,62,78,62,93,62
3,62,79,62,93,62
4,62,80,62,93,62
...,...,...,...,...,...
220,80,131,80,168,80
221,81,132,81,169,81
222,85,133,85,173,85
223,86,134,86,174,86


##### b. Save to CSV

In [767]:
df_dim_plant_characteristics = df_dim_plant_characteristics[['plant_characteristic_id', 'height', 'height_unit', 'wide',
       'wide_unit', 'leaf_color']]

df_dim_plant_characteristics

Unnamed: 0,plant_characteristic_id,height,height_unit,wide,wide_unit,leaf_color
0,61,3,Meter,300,Centimeter,Red
1,62,2,Meter,1,Meter,Green
2,63,600,Centimeter,900,Centimeter,Green
3,64,400,Centimeter,300,Centimeter,Green
4,65,200,Centimeter,300,Centimeter,Green
5,66,1200,Centimeter,600,Centimeter,Green
6,67,2,Meter,1,Meter,Green
7,68,500,Centimeter,300,Centimeter,Green
8,69,150,Centimeter,75,Centimeter,Green
9,75,500,Centimeter,400,Centimeter,Green


In [768]:
df_dim_plant_characteristics.to_csv('../data_source_to_load/dim_plant_characteristics.csv', index=False)

#### 3.4.4.6 Merged Fact Table Plants Data Final

In [769]:
# Menghitung total_plants
df_fact_plants_data['total_plants'] = df_fact_plants_data['plant_id'].nunique()
df_fact_plants_data

Unnamed: 0,plant_id,plant_faqs_id,plant_characteristic_id,plant_instruction_id,watering_reminders_id,total_plants
0,61,76,61,92,61,19
1,62,77,62,93,62,19
2,62,78,62,93,62,19
3,62,79,62,93,62,19
4,62,80,62,93,62,19
...,...,...,...,...,...,...
220,80,131,80,168,80,19
221,81,132,81,169,81,19
222,85,133,85,173,85,19
223,86,134,86,174,86,19


##### a. Save to CSV

In [770]:
df_fact_plants_data.to_csv('../data_source_to_load/fact_plants_data.csv', index=False)

# 4. Load

In [773]:
load_dotenv()

project_id = os.getenv('PROJECT_ID')
dataset_id = os.getenv('DATASET_ID')
service_acc = os.getenv('SERVICE_ACCOUNT')

os.environ['SERVICE_ACCOUNT'] = service_acc

credentials = service_account.Credentials.from_service_account_file(service_acc)

client = bigquery.Client(credentials=credentials, project=project_id)

def load_csv_to_bigquery(csv_file_path, table_id):
    # Baca file CSV ke DataFrame
    df = pd.read_csv(csv_file_path)

    # Bersihkan DataFrame
    df_cleaned = cleanse_dataframe(df)
    
    df_cleaned = change_type_data(df)

    # Tentukan ID tabel penuh: dataset_id.table_name
    table_id = f"{project_id}.{dataset_id}.{table_id}"
    
    # Muat DataFrame ke tabel BigQuery
    job = client.load_table_from_dataframe(df_cleaned, table_id)
    
    # Tunggu pekerjaan selesai
    job.result()
    
    print(f"Loaded {len(df_cleaned)} rows into {table_id}.")

# Direktori tempat file CSV Anda berada
data_source_dir = '../data_source_to_load'

# Muat setiap file CSV di direktori tersebut ke tabel BigQuery yang sesuai
for csv_file in os.listdir(data_source_dir):
    if csv_file.endswith('.csv'):
        csv_file_path = os.path.join(data_source_dir, csv_file)
        table_name = os.path.splitext(csv_file)[0]  # Nama tabel diambil dari nama file tanpa ekstensi
        load_csv_to_bigquery(csv_file_path, table_name)

Memeriksa missing values...
Membersihkan tag HTML dari kolom teks...
Membersihkan tag HTML di kolom 'name_instruction_categories'...
Membersihkan tag HTML di kolom 'step_title'...
Membersihkan tag HTML di kolom 'step_description'...
Membersihkan tag HTML di kolom 'step_image_url'...
Membersihkan tag HTML di kolom 'additional_tips'...
Membersihkan tag HTML di kolom 'created_at'...
Membersihkan tag HTML di kolom 'updated_at'...
Memeriksa duplikasi...
Jumlah baris duplikat: 0
Memastikan tipe data yang benar...
Mengonversi kolom 'created_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Mengonversi kolom 'updated_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Memastikan tipe data yang benar...
Mengonversi kolom 'created_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Mengonversi kolom 'updated_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['created_at'].fillna(datetime.now(), inplace=True)  # Isi dengan tanggal dan waktu saat ini
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['updated_at'].fillna(datetime.now(), inplace=True)  # Isi dengan tanggal dan waktu saat ini
The behavior will change in pandas 3.0. Th

Loaded 61 rows into plantopia-capstone.plantopia.dim_plant_instructions.
Memeriksa missing values...
Membersihkan tag HTML dari kolom teks...
Membersihkan tag HTML di kolom 'time'...
Membersihkan tag HTML di kolom 'type'...
Membersihkan tag HTML di kolom 'created_at'...
Membersihkan tag HTML di kolom 'updated_at'...
Memeriksa duplikasi...
Jumlah baris duplikat: 0
Memastikan tipe data yang benar...
Mengonversi kolom 'created_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Mengonversi kolom 'updated_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Memastikan tipe data yang benar...
Mengonversi kolom 'created_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Mengonversi kolom 'updated_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['created_at'].fillna(datetime.now(), inplace=True)  # Isi dengan tanggal dan waktu saat ini
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['updated_at'].fillna(datetime.now(), inplace=True)  # Isi dengan tanggal dan waktu saat ini
The behavior will change in pandas 3.0. Th

Loaded 2 rows into plantopia-capstone.plantopia.dim_customize_watering_reminders.
Memeriksa missing values...
Membersihkan tag HTML dari kolom teks...
Membersihkan tag HTML di kolom 'user_name'...
Membersihkan tag HTML di kolom 'plant_name'...
Membersihkan tag HTML di kolom 'created_at'...
Membersihkan tag HTML di kolom 'updated_at'...
Memeriksa duplikasi...
Jumlah baris duplikat: 0
Memastikan tipe data yang benar...
Mengonversi kolom 'created_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Mengonversi kolom 'updated_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Memastikan tipe data yang benar...
Mengonversi kolom 'created_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Mengonversi kolom 'updated_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['created_at'].fillna(datetime.now(), inplace=True)  # Isi dengan tanggal dan waktu saat ini
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['updated_at'].fillna(datetime.now(), inplace=True)  # Isi dengan tanggal dan waktu saat ini
The behavior will change in pandas 3.0. Th

Loaded 1 rows into plantopia-capstone.plantopia.dim_watering_histories.
Memeriksa missing values...
Membersihkan tag HTML dari kolom teks...
Membersihkan tag HTML di kolom 'question'...
Membersihkan tag HTML di kolom 'answer'...
Membersihkan tag HTML di kolom 'created_at'...
Membersihkan tag HTML di kolom 'updated_at'...
Memeriksa duplikasi...
Jumlah baris duplikat: 0
Memastikan tipe data yang benar...
Mengonversi kolom 'created_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Mengonversi kolom 'updated_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Memastikan tipe data yang benar...
Mengonversi kolom 'created_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Mengonversi kolom 'updated_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['created_at'].fillna(datetime.now(), inplace=True)  # Isi dengan tanggal dan waktu saat ini
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['updated_at'].fillna(datetime.now(), inplace=True)  # Isi dengan tanggal dan waktu saat ini
The behavior will change in pandas 3.0. Th

Loaded 60 rows into plantopia-capstone.plantopia.dim_plant_faqs.
Memeriksa missing values...
Mengisi missing values di kolom 'last_watered_at' dengan 0...
Membersihkan tag HTML dari kolom teks...
Membersihkan tag HTML di kolom 'user_name'...
Membersihkan tag HTML di kolom 'plant_name'...
Membersihkan tag HTML di kolom 'created_at'...
Membersihkan tag HTML di kolom 'updated_at'...
Memeriksa duplikasi...
Jumlah baris duplikat: 0
Memastikan tipe data yang benar...
Mengonversi kolom 'created_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Mengonversi kolom 'updated_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Mengonversi kolom 'last_watered_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Memastikan tipe data yang benar...
Mengonversi kolom 'created_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Mengonversi kolom 'updated_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Mengonversi kolom 'last_watered_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'.

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['created_at'].fillna(datetime.now(), inplace=True)  # Isi dengan tanggal dan waktu saat ini
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate obje

Loaded 10 rows into plantopia-capstone.plantopia.dim_my_plants.
Memeriksa missing values...
Membersihkan tag HTML dari kolom teks...
Membersihkan tag HTML di kolom 'user_name'...
Membersihkan tag HTML di kolom 'plant_name'...
Membersihkan tag HTML di kolom 'plant_category'...
Membersihkan tag HTML di kolom 'created_at'...
Membersihkan tag HTML di kolom 'updated_at'...
Memeriksa duplikasi...
Jumlah baris duplikat: 0
Memastikan tipe data yang benar...
Mengonversi kolom 'created_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Mengonversi kolom 'updated_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Memastikan tipe data yang benar...
Mengonversi kolom 'created_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Mengonversi kolom 'updated_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['created_at'].fillna(datetime.now(), inplace=True)  # Isi dengan tanggal dan waktu saat ini
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['updated_at'].fillna(datetime.now(), inplace=True)  # Isi dengan tanggal dan waktu saat ini
The behavior will change in pandas 3.0. Th

Loaded 29 rows into plantopia-capstone.plantopia.dim_planting_histories.
Memeriksa missing values...
Membersihkan tag HTML dari kolom teks...
Memeriksa duplikasi...
Jumlah baris duplikat: 0
Memastikan tipe data yang benar...
Memastikan tipe data yang benar...
Loaded 31 rows into plantopia-capstone.plantopia.fact_user_activities.
Memeriksa missing values...
Membersihkan tag HTML dari kolom teks...
Membersihkan tag HTML di kolom 'height_unit'...
Membersihkan tag HTML di kolom 'wide_unit'...
Membersihkan tag HTML di kolom 'leaf_color'...
Memeriksa duplikasi...
Jumlah baris duplikat: 0
Memastikan tipe data yang benar...
Memastikan tipe data yang benar...




Loaded 19 rows into plantopia-capstone.plantopia.dim_plant_characteristics.
Memeriksa missing values...
Membersihkan tag HTML dari kolom teks...
Membersihkan tag HTML di kolom 'plant_name'...
Membersihkan tag HTML di kolom 'description'...
Membersihkan tag HTML di kolom 'sunlight'...
Membersihkan tag HTML di kolom 'planting_time'...
Membersihkan tag HTML di kolom 'plant_category'...
Membersihkan tag HTML di kolom 'climate_condition'...
Membersihkan tag HTML di kolom 'additional_tips'...
Membersihkan tag HTML di kolom 'created_at'...
Membersihkan tag HTML di kolom 'updated_at'...
Memeriksa duplikasi...
Jumlah baris duplikat: 0
Memastikan tipe data yang benar...
Mengonversi kolom 'created_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Mengonversi kolom 'updated_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Memastikan tipe data yang benar...
Mengonversi kolom 'created_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Mengonversi kolom 'updated_at' menjadi datetime de

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['created_at'].fillna(datetime.now(), inplace=True)  # Isi dengan tanggal dan waktu saat ini
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['updated_at'].fillna(datetime.now(), inplace=True)  # Isi dengan tanggal dan waktu saat ini
The behavior will change in pandas 3.0. Th

Loaded 19 rows into plantopia-capstone.plantopia.dim_plants.
Memeriksa missing values...
Membersihkan tag HTML dari kolom teks...
Memeriksa duplikasi...
Jumlah baris duplikat: 0
Memastikan tipe data yang benar...
Memastikan tipe data yang benar...
Loaded 225 rows into plantopia-capstone.plantopia.fact_plants_data.
Memeriksa missing values...
Membersihkan tag HTML dari kolom teks...
Membersihkan tag HTML di kolom 'each'...
Membersihkan tag HTML di kolom 'unit'...
Membersihkan tag HTML di kolom 'watering_time'...
Membersihkan tag HTML di kolom 'weather_condition'...
Membersihkan tag HTML di kolom 'condition_description'...
Membersihkan tag HTML di kolom 'created_at'...
Membersihkan tag HTML di kolom 'updated_at'...
Memeriksa duplikasi...
Jumlah baris duplikat: 0
Memastikan tipe data yang benar...
Mengonversi kolom 'created_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Mengonversi kolom 'updated_at' menjadi datetime dengan format '%Y-%m-%d %H:%M'...
Memastikan tipe data yang bena

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['created_at'].fillna(datetime.now(), inplace=True)  # Isi dengan tanggal dan waktu saat ini
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['updated_at'].fillna(datetime.now(), inplace=True)  # Isi dengan tanggal dan waktu saat ini
The behavior will change in pandas 3.0. Th

Loaded 19 rows into plantopia-capstone.plantopia.dim_watering_reminders.
