# 1. Import Library yang dibutuhkan

In [1]:
from sqlalchemy import create_engine, inspect
from dotenv import load_dotenv
import os
import pandas as pd

## 1.1 Connect to Database Backend

In [2]:
load_dotenv()

user = os.getenv('DB_USER')
password = os.getenv('DB_PASSWORD')
host = os.getenv('DB_HOST')
port = os.getenv('DB_PORT')
database = os.getenv('DB_NAME')

def get_connection():
    return create_engine(
        f"mysql+pymysql://{user}:{password}@{host}:{port}/{database}"
    )

def get_all_tables(engine):
    inspector = inspect(engine)
    return inspector.get_table_names()

def table_to_dataframe(engine, table_name):
    with engine.connect() as connection:
        query = f"SELECT * FROM {table_name}"
        result = connection.execute(query)
        df = pd.DataFrame(result.fetchall(), columns=result.keys())
        return df

engine = get_connection()
print(f"Koneksi ke {host} untuk user {user} sukses dibuat!.")

Koneksi ke 104.198.27.207 untuk user dataengineer sukses dibuat!.


# 2. Extract

## 2.1 Convert Tables to Dataframe

In [3]:
tables = get_all_tables(engine)
print(f"Tables in the database: {tables}")

Tables in the database: ['admins', 'customize_watering_reminders', 'notifications', 'plant_categories', 'plant_characteristics', 'plant_faqs', 'plant_images', 'plant_instruction_categories', 'plant_instructions', 'plant_progresses', 'plant_reminders', 'plants', 'user_plant_histories', 'user_plants', 'users', 'watering_histories']


### 2.1.1 All Dataframe

In [4]:
output_dir = "../data_source_csv"

for table in tables:
    df_variable_name = f"df_{table}"
    globals()[df_variable_name] = table_to_dataframe(engine, table)
    print(f"Menampilkan Dataframe dari tabel: {table}")
    display(globals()[df_variable_name])
    
    csv_filename = os.path.join(output_dir, f"{table}.csv")
    globals()[df_variable_name].to_csv(csv_filename, index=False)
    print(f"Menyimpan Dataframe dari tabel {table} ke {csv_filename} \n")

  result = connection.execute(query)


Menampilkan Dataframe dari tabel: admins


Unnamed: 0,id,name,email,password,url_image,created_at,updated_at
0,1,Octaviano Ryan Eka Putra Hartanto,octavianoryan123@gmail.com,$2a$08$wAw9SfEDq6EsDZ7NqYpNnOXE6aV1GB3YUA3zWpT...,,2024-05-28 09:13:45.846,2024-05-28 09:13:45.846
1,2,Anggita Prameswari Darmawan,aprameswarid@gmail.com,$2a$08$MDwcRNb2SadCbSiUORr6H.QISr6j3ygPcGGhJTp...,,2024-06-03 03:25:41.667,2024-06-03 03:25:41.667
2,3,Anggita Darmawan,anggitadarmawan3@gmail.com,$2a$08$lFrhHW1q9rG/QmrTPeLy9OqQFZtwj/k72HfJ2oq...,,2024-06-05 01:27:30.592,2024-06-05 01:27:30.592
3,4,adminFe,adminfe123@gmail.com,$2a$08$OY4lskcKiJO7zldb.n5t2u7KHYtVBKt9zzR96Zq...,,2024-06-15 03:50:19.301,2024-06-15 03:50:19.301


Menyimpan Dataframe dari tabel admins ke ../data_source_csv/admins.csv 

Menampilkan Dataframe dari tabel: customize_watering_reminders


Unnamed: 0,id,my_plant_id,time,recurring,type,created_at,updated_at


Menyimpan Dataframe dari tabel customize_watering_reminders ke ../data_source_csv/customize_watering_reminders.csv 

Menampilkan Dataframe dari tabel: notifications


Unnamed: 0,id,title,body,user_id,is_read,created_at,updated_at,plant_id


Menyimpan Dataframe dari tabel notifications ke ../data_source_csv/notifications.csv 

Menampilkan Dataframe dari tabel: plant_categories


Unnamed: 0,id,name,image_url,created_at,updated_at
0,1,Fruits,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-05-30 00:45:23.789,2024-06-14 02:51:56.957
1,2,Succulents,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2023-05-28 10:00:00.000,2024-06-14 02:53:30.136
2,4,Angiosperms,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2023-05-28 10:00:00.000,2024-06-14 02:53:50.500
3,5,Herbs,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2023-05-28 10:00:00.000,2024-06-14 02:54:18.975
4,7,Shrubs,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2023-05-28 10:00:00.000,2024-06-14 02:54:50.460
5,8,Ferns,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-07 07:07:57.671,2024-06-14 02:55:21.187
6,9,Trees,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-07 07:18:57.328,2024-06-14 02:56:14.506
7,10,Climbers,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-14 02:57:03.527,2024-06-14 02:57:03.527
8,11,Conifers,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-14 02:57:20.947,2024-06-14 02:57:20.947
9,12,Annual,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-14 02:57:33.736,2024-06-14 02:57:33.736


Menyimpan Dataframe dari tabel plant_categories ke ../data_source_csv/plant_categories.csv 

Menampilkan Dataframe dari tabel: plant_characteristics


Unnamed: 0,id,plant_id,height,height_unit,wide,wide_unit,leaf_color
0,46,46,100,centimeter,5,centimeter,Green
1,55,55,1,Centimeter,1,Meter,Green
2,59,59,1,Centimeter,1,Meter,Green


Menyimpan Dataframe dari tabel plant_characteristics ke ../data_source_csv/plant_characteristics.csv 

Menampilkan Dataframe dari tabel: plant_faqs


Unnamed: 0,id,plant_id,question,answer,created_at,updated_at
0,61,46,Question 1,Answer 1,2024-06-15 15:22:02.784,2024-06-15 15:22:02.784
1,62,46,Question 2,Answer 2,2024-06-15 15:22:02.784,2024-06-15 15:22:02.784
2,69,55,<p>sdfsdfdd</p>,<p>sdfasdaaaaa</p>,2024-06-16 04:15:28.010,2024-06-16 04:15:28.010
3,73,59,<p>asdasdasd</p>,<p>asdassss</p>,2024-06-16 05:10:41.556,2024-06-16 05:10:41.556


Menyimpan Dataframe dari tabel plant_faqs ke ../data_source_csv/plant_faqs.csv 

Menampilkan Dataframe dari tabel: plant_images


Unnamed: 0,id,plant_id,file_name,is_primary,created_at,updated_at
0,54,46,https://res.cloudinary.com/dxrz0cg5z/image/upl...,1,2024-06-15 15:22:02.784,2024-06-15 15:22:02.784
1,62,55,https://res.cloudinary.com/dxrz0cg5z/image/upl...,1,2024-06-16 04:15:28.010,2024-06-16 04:15:28.010
2,67,59,https://res.cloudinary.com/dxrz0cg5z/image/upl...,1,2024-06-16 05:10:41.556,2024-06-16 05:10:41.556
3,68,59,https://res.cloudinary.com/dxrz0cg5z/image/upl...,0,2024-06-16 05:10:41.556,2024-06-16 05:10:41.556


Menyimpan Dataframe dari tabel plant_images ke ../data_source_csv/plant_images.csv 

Menampilkan Dataframe dari tabel: plant_instruction_categories


Unnamed: 0,id,name,description,image_url,created_at,updated_at
0,1,Soil Preparation,"In this section, you will learn how to properl...",https://res.cloudinary.com/dxrz0cg5z/image/upl...,2023-05-28 10:00:00,2024-06-14 02:16:24.448
1,2,Planting Seeds,"At this stage, you will learn how to properly ...",https://res.cloudinary.com/dxrz0cg5z/image/upl...,2023-05-28 10:00:00,2024-06-14 02:17:35.928
2,3,Plant Care,"In this section, you will learn how to properl...",https://res.cloudinary.com/dxrz0cg5z/image/upl...,2023-05-28 10:00:00,2024-06-14 02:18:46.131
3,4,Harvest,"At this stage, you will learn how to harvest p...",https://res.cloudinary.com/dxrz0cg5z/image/upl...,2023-05-28 10:00:00,2024-06-14 02:19:50.778


Menyimpan Dataframe dari tabel plant_instruction_categories ke ../data_source_csv/plant_instruction_categories.csv 

Menampilkan Dataframe dari tabel: plant_instructions


Unnamed: 0,id,plant_id,step_number,step_title,step_description,step_image_url,additional_tips,created_at,updated_at,instruction_category_id
0,68,46,1,Instruction 1,Description of Instruction 1,https://res.cloudinary.com/dxrz0cg5z/image/upl...,,2024-06-15 15:22:02.784,2024-06-15 15:22:02.784,1
1,69,46,2,Instruction 2,Description of Instruction 2,,,2024-06-15 15:22:02.784,2024-06-15 15:22:02.784,2
2,81,55,1,asdasd,<p>asdasdsd</p>,https://res.cloudinary.com/dxrz0cg5z/image/upl...,,2024-06-16 04:15:28.010,2024-06-16 04:15:28.010,2
3,82,55,2,dsffdsfsdf,<p>sdasdasd</p>,,,2024-06-16 04:15:28.010,2024-06-16 04:15:28.010,3
4,89,59,1,asdasdas,<p>asdasdasd</p>,https://res.cloudinary.com/dxrz0cg5z/image/upl...,,2024-06-16 05:10:41.556,2024-06-16 05:10:41.556,2


Menyimpan Dataframe dari tabel plant_instructions ke ../data_source_csv/plant_instructions.csv 

Menampilkan Dataframe dari tabel: plant_progresses


Unnamed: 0,id,plant_id,user_id,image_url,created_at,updated_at
0,21,46,23,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-16 05:51:59.463,2024-06-16 05:51:59.463
1,22,55,14,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-16 08:40:12.494,2024-06-16 08:40:12.494


Menyimpan Dataframe dari tabel plant_progresses ke ../data_source_csv/plant_progresses.csv 

Menampilkan Dataframe dari tabel: plant_reminders


Unnamed: 0,id,plant_id,watering_frequency,each,watering_amount,unit,watering_time,weather_condition,condition_description,created_at,updated_at
0,46,46,7,days,500,ml,09:00,Sunny,Water early in the morning when it's sunny,2024-06-15 15:22:02.791,2024-06-15 15:22:02.791
1,55,55,1,Week,1,Milliliter (ml),12:00,Rainy,asdasdss,2024-06-16 04:15:28.019,2024-06-16 04:15:28.019
2,59,59,1,Day,1,Liter (l),12:00,Rainy,asdasdasdasd,2024-06-16 05:10:41.563,2024-06-16 05:10:41.563


Menyimpan Dataframe dari tabel plant_reminders ke ../data_source_csv/plant_reminders.csv 

Menampilkan Dataframe dari tabel: plants


Unnamed: 0,id,name,description,is_toxic,harvest_duration,sunlight,planting_time,plant_category_id,climate_condition,plant_characteristic_id,created_at,updated_at,additional_tips
0,46,Rose update,Rose is Flowers,1,90,Fullsun,Summer,1,Dry,0,2024-06-15 15:22:02.784,2024-06-15 15:22:02.784,Add tips instruction
1,55,asdasd-dsfdsdf,<p>asdasdsassd</p>,1,1,Fullsun,Summer,1,Dry,0,2024-06-16 04:15:28.010,2024-06-16 04:15:28.010,<p>fsdfsdddf</p>
2,59,asdaasd-sdasd,<p>asdasd</p>,1,3,Fullsun,Summer,1,Wet,0,2024-06-16 05:10:41.556,2024-06-16 05:10:41.556,<p>asdasdsdasd</p>


Menyimpan Dataframe dari tabel plants ke ../data_source_csv/plants.csv 

Menampilkan Dataframe dari tabel: user_plant_histories


Unnamed: 0,id,user_id,plant_id,plant_name,plant_category,plant_image_url,created_at,updated_at
0,2,3,46,Rose update,Fruits,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-15 23:57:01.054,2024-06-15 23:57:01.054
1,3,14,46,Rose update,Fruits,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-16 04:29:58.931,2024-06-16 04:29:58.931
2,4,14,55,asdasd-dsfdsdf,Fruits,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-16 04:36:07.031,2024-06-16 04:36:07.031
3,5,14,55,asdasd-dsfdsdf,Fruits,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-16 04:56:05.872,2024-06-16 04:56:05.872
4,6,14,46,Rose update,Fruits,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-16 04:56:47.220,2024-06-16 04:56:47.220
5,7,14,55,asdasd-dsfdsdf,Fruits,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-16 07:33:02.184,2024-06-16 07:33:02.184
6,8,14,59,asdaasd-sdasd,Fruits,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-16 08:10:30.869,2024-06-16 08:10:30.869
7,9,14,46,Rose update,Fruits,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-16 08:33:58.143,2024-06-16 08:33:58.143
8,10,14,55,asdasd-dsfdsdf,Fruits,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-16 08:36:24.144,2024-06-16 08:36:24.144
9,11,14,46,Rose update,Fruits,https://res.cloudinary.com/dxrz0cg5z/image/upl...,2024-06-16 08:38:36.573,2024-06-16 08:38:36.573


Menyimpan Dataframe dari tabel user_plant_histories ke ../data_source_csv/user_plant_histories.csv 

Menampilkan Dataframe dari tabel: user_plants


Unnamed: 0,id,user_id,plant_id,created_at,updated_at,last_watered_at,customize_name
0,109,17,46,2024-06-15 15:33:55.379,2024-06-15 15:33:55.379,,Rose update
1,123,23,46,2024-06-16 05:51:30.271,2024-06-16 05:51:30.271,,Rose update
2,133,14,55,2024-06-16 10:53:19.495,2024-06-16 10:53:19.495,,asdasd-dsfdsdf
3,136,14,46,2024-06-16 12:21:05.281,2024-06-16 12:21:05.281,,Rose update


Menyimpan Dataframe dari tabel user_plants ke ../data_source_csv/user_plants.csv 

Menampilkan Dataframe dari tabel: users


Unnamed: 0,id,name,email,password,is_active,otp,url_image,created_at,updated_at,fcm_token
0,3,Octaviano Ryan Eka Putra Hartanto,octavianoryan030@gmail.com,$2a$08$R9/7B15r6CgnfHhTEvW3o.qDQtB61Vmw2WxL8OQ...,1,986482,,2024-05-28 09:08:58.892,2024-05-28 09:08:58.892,
1,5,Setiabudi,mamansetiabudi12061982@gmail.com,$2a$08$ur/09YUT0MI2s5CvTcKNWOz29g73ZbaIaa7mdti...,1,364170,,2024-06-04 06:02:55.224,2024-06-04 06:02:55.224,
2,6,Aletha Safa,yellowblue@gmail.com,$2a$08$kRWcnKu27ojUmST7WqB17.OZYKI1En2MYHFVtMg...,0,486315,,2024-06-05 05:50:35.189,2024-06-05 05:50:35.189,
3,7,Valdimir Putin,kangkingkung79@gmail.com,$2a$08$8TlhkfNRNocMnI1RAs1gXuQXZomO1ILgP8vQVbn...,1,714585,,2024-06-05 12:25:47.200,2024-06-05 12:25:47.200,092013
4,9,Aletha Safa,yellow3@gmail.com,$2a$08$0MeHpb3QgFnL51BfR7YZEu8MyBgs050e2K3plFL...,0,870521,,2024-06-06 14:49:11.846,2024-06-06 14:49:11.846,092013
5,10,Aletha Safa,blue04@gmail.com,$2a$08$Y6QSW6XzuXdouaX7krSLEuYXimEGEhURjVBaAzp...,0,841146,,2024-06-06 15:41:47.729,2024-06-06 15:41:47.729,092013
6,11,Aletha Safa,blue05@gmail.com,$2a$08$p2RCU3u8YgiHG8oiuPFcze2DYhAMXSYppX88bfW...,0,229096,,2024-06-06 15:43:39.360,2024-06-06 15:43:39.360,092013
7,12,Aletha Safa,blue06@gmail.com,$2a$08$tqSwo4MtjhOFNYgmta8x3.2Esa4fnkEDFgK7SiH...,0,853420,,2024-06-07 03:59:48.264,2024-06-07 03:59:48.264,092013
8,14,Dafa Aldian,dafaaldian155@gmail.com,$2a$08$igkmrN4.3Uwe8TjnX8uYhuKfhhKRc7QyQsGk0PN...,1,290925,,2024-06-09 15:31:59.073,2024-06-09 15:31:59.073,fowtobnNSrGVOH_a3KOzMa:APA91bH9lp6Rbws3oI6Hngz...
9,15,Annisa,annisa@gmail.com,$2a$08$43fz/1GQXWmR7oEyDAW/6.3NIXE9jFD/dTyW12o...,0,122927,,2024-06-09 16:26:37.421,2024-06-09 16:26:37.421,dI2YkfWPS0eEaTsSNfgDYZ:APA91bFYX3YZVIDzbad7_yR...


Menyimpan Dataframe dari tabel users ke ../data_source_csv/users.csv 

Menampilkan Dataframe dari tabel: watering_histories


Unnamed: 0,id,plant_id,user_id,created_at,updated_at
0,4,46,3,2024-06-16 00:21:28.425,2024-06-16 00:21:28.425
1,5,46,14,2024-06-16 07:17:08.217,2024-06-16 07:17:08.217


Menyimpan Dataframe dari tabel watering_histories ke ../data_source_csv/watering_histories.csv 



# 3. Transform

## 3.1 Cleaning Data

In [5]:
def cleanse_dataframe(df):
    print("Memeriksa missing values...")
    missing_values = df.isnull().sum()
    print(missing_values)
    
    if missing_values.any():
        print("Mengisi missing values dengan 0...")
        df.fillna(0, inplace=True)

    print("Memeriksa duplikasi...")
    duplicate_rows = df.duplicated().sum()
    print(f"Jumlah baris duplikat: {duplicate_rows}")
    
    if duplicate_rows > 0:
        print("Menghapus baris duplikat...")
        df.drop_duplicates(inplace=True)
    
    return df

In [6]:
for table in tables:
    df_variable_name = f"df_{table}"
    
    # Cleansing DataFrame
    globals()[df_variable_name] = cleanse_dataframe(globals()[df_variable_name])

Memeriksa missing values...
id            0
name          0
email         0
password      0
url_image     0
created_at    0
updated_at    0
dtype: int64
Memeriksa duplikasi...
Jumlah baris duplikat: 0
Memeriksa missing values...
id             0
my_plant_id    0
time           0
recurring      0
type           0
created_at     0
updated_at     0
dtype: int64
Memeriksa duplikasi...
Jumlah baris duplikat: 0
Memeriksa missing values...
id            0
title         0
body          0
user_id       0
is_read       0
created_at    0
updated_at    0
plant_id      0
dtype: int64
Memeriksa duplikasi...
Jumlah baris duplikat: 0
Memeriksa missing values...
id            0
name          0
image_url     0
created_at    0
updated_at    0
dtype: int64
Memeriksa duplikasi...
Jumlah baris duplikat: 0
Memeriksa missing values...
id             0
plant_id       0
height         0
height_unit    0
wide           0
wide_unit      0
leaf_color     0
dtype: int64
Memeriksa duplikasi...
Jumlah baris duplikat:

  df.fillna(0, inplace=True)


## 3.2 Informasi Dataframe

In [7]:
def info_dataframe(df):
    print("Menampilkan Informasi Field di tiap Dataframe")
    info_dataframe = df.info()
    print(info_dataframe)
    
    return df

In [8]:
for table in tables:
    df_variable_name = f"df_{table}"
    
    # Informasi DataFrame
    globals()[df_variable_name] = info_dataframe(globals()[df_variable_name])

Menampilkan Informasi Field di tiap Dataframe


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   id          4 non-null      int64         
 1   name        4 non-null      object        
 2   email       4 non-null      object        
 3   password    4 non-null      object        
 4   url_image   4 non-null      object        
 5   created_at  4 non-null      datetime64[ns]
 6   updated_at  4 non-null      datetime64[ns]
dtypes: datetime64[ns](2), int64(1), object(4)
memory usage: 352.0+ bytes
None
Menampilkan Informasi Field di tiap Dataframe
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 0 entries
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   id           0 non-null      object
 1   my_plant_id  0 non-null      object
 2   time         0 non-null      object
 3   recurring    0 non-null      object
 4   

## 3.3 Change Type Data

In [9]:
def change_type_data(df):
    # Memastikan tipe data yang benar
    print(f"Memastikan tipe data yang benar...")
    for column in df.columns:
        if df[column].dtype == 'object':
            df[column] = df[column].astype('category')
        elif pd.api.types.is_datetime64_any_dtype(df[column]):
            df[column] = df[column].dt.strftime('%Y-%m-%d')
            df[column] = pd.to_datetime(df[column], format='%Y-%m-%d')
    
    return df

In [10]:
dataframes = []  # List untuk menyimpan nama dataframe

for table in tables:
    df_variable_name = f"df_{table}"
    
    # Cleansing DataFrame
    globals()[df_variable_name] = change_type_data(globals()[df_variable_name])
    
    # Tambahkan nama dataframe ke dalam list
    dataframes.append(df_variable_name)

# Output untuk verifikasi
print(dataframes)

Memastikan tipe data yang benar...
Memastikan tipe data yang benar...
Memastikan tipe data yang benar...
Memastikan tipe data yang benar...
Memastikan tipe data yang benar...
Memastikan tipe data yang benar...
Memastikan tipe data yang benar...
Memastikan tipe data yang benar...
Memastikan tipe data yang benar...
Memastikan tipe data yang benar...
Memastikan tipe data yang benar...
Memastikan tipe data yang benar...
Memastikan tipe data yang benar...
Memastikan tipe data yang benar...
Memastikan tipe data yang benar...
Memastikan tipe data yang benar...
['df_admins', 'df_customize_watering_reminders', 'df_notifications', 'df_plant_categories', 'df_plant_characteristics', 'df_plant_faqs', 'df_plant_images', 'df_plant_instruction_categories', 'df_plant_instructions', 'df_plant_progresses', 'df_plant_reminders', 'df_plants', 'df_user_plant_histories', 'df_user_plants', 'df_users', 'df_watering_histories']


### 3.3.1 Menampilkan Informasi Dataframe Kembali

In [11]:
for table in dataframes:
    df_variable_name = f"{table}"
    
    # Informasi DataFrame
    globals()[df_variable_name] = info_dataframe(globals()[df_variable_name])

Menampilkan Informasi Field di tiap Dataframe
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   id          4 non-null      int64         
 1   name        4 non-null      category      
 2   email       4 non-null      category      
 3   password    4 non-null      category      
 4   url_image   4 non-null      category      
 5   created_at  4 non-null      datetime64[ns]
 6   updated_at  4 non-null      datetime64[ns]
dtypes: category(4), datetime64[ns](2), int64(1)
memory usage: 968.0 bytes
None
Menampilkan Informasi Field di tiap Dataframe
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 0 entries
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   id           0 non-null      category
 1   my_plant_id  0 non-null      category
 2   time         0 non-null    

### 3.3.2 Sample Dataframe

In [12]:
df_watering_histories

Unnamed: 0,id,plant_id,user_id,created_at,updated_at
0,4,46,3,2024-06-16,2024-06-16
1,5,46,14,2024-06-16,2024-06-16


## 3.4 Table Dimensional and Merged to Fact Table

### 3.4.1 Save to CSV Table Dimensional

In [13]:
dataframes

['df_admins',
 'df_customize_watering_reminders',
 'df_notifications',
 'df_plant_categories',
 'df_plant_characteristics',
 'df_plant_faqs',
 'df_plant_images',
 'df_plant_instruction_categories',
 'df_plant_instructions',
 'df_plant_progresses',
 'df_plant_reminders',
 'df_plants',
 'df_user_plant_histories',
 'df_user_plants',
 'df_users',
 'df_watering_histories']

In [14]:
output_dir = "../data_source_dimensional"

for table in tables:
    df_variable_name = f"df_{table}"
    
    csv_filename = os.path.join(output_dir, f"dim_{table}.csv")
    globals()[df_variable_name].to_csv(csv_filename, index=False)
    print(f"Menyimpan Dataframe Dimensional dari dataframe df_{table} ke {csv_filename} \n")

Menyimpan Dataframe Dimensional dari dataframe df_admins ke ../data_source_dimensional/dim_admins.csv 

Menyimpan Dataframe Dimensional dari dataframe df_customize_watering_reminders ke ../data_source_dimensional/dim_customize_watering_reminders.csv 

Menyimpan Dataframe Dimensional dari dataframe df_notifications ke ../data_source_dimensional/dim_notifications.csv 

Menyimpan Dataframe Dimensional dari dataframe df_plant_categories ke ../data_source_dimensional/dim_plant_categories.csv 

Menyimpan Dataframe Dimensional dari dataframe df_plant_characteristics ke ../data_source_dimensional/dim_plant_characteristics.csv 

Menyimpan Dataframe Dimensional dari dataframe df_plant_faqs ke ../data_source_dimensional/dim_plant_faqs.csv 

Menyimpan Dataframe Dimensional dari dataframe df_plant_images ke ../data_source_dimensional/dim_plant_images.csv 

Menyimpan Dataframe Dimensional dari dataframe df_plant_instruction_categories ke ../data_source_dimensional/dim_plant_instruction_categories.cs

### 3.4.2 Dataframe Dimensional

In [15]:
# Path ke direktori yang berisi file CSV
input_dir = '../data_source_dimensional'

# List untuk menyimpan nama dataframe yang dibuat
dataframe_dimensional = []

# Loop untuk membaca semua file CSV dalam direktori
for filename in os.listdir(input_dir):
    if filename.endswith('.csv'):
        # Mengambil nama tabel dari nama file
        table_name = os.path.splitext(filename)[0]  # Menghilangkan ekstensi .csv
        
        # Membuat nama variabel dataframe
        df_variable_name = f"df_{table_name}"
        
        # Membaca file CSV menjadi dataframe
        df_path = os.path.join(input_dir, filename)
        globals()[df_variable_name] = pd.read_csv(df_path)
        
        # Tambahkan nama dataframe ke dalam list
        dataframe_dimensional.append(df_variable_name)

# Output untuk verifikasi
print(dataframe_dimensional)

['df_dim_notifications', 'df_dim_user_plant_histories', 'df_dim_plant_categories', 'df_dim_plant_instructions', 'df_dim_customize_watering_reminders', 'df_dim_watering_histories', 'df_dim_admins', 'df_dim_plant_faqs', 'df_dim_user_plants', 'df_dim_plant_characteristics', 'df_dim_plants', 'df_dim_users', 'df_dim_plant_instruction_categories', 'df_dim_plant_progresses', 'df_dim_plant_reminders', 'df_dim_plant_images']


In [16]:
dataframe_dimensional

['df_dim_notifications',
 'df_dim_user_plant_histories',
 'df_dim_plant_categories',
 'df_dim_plant_instructions',
 'df_dim_customize_watering_reminders',
 'df_dim_watering_histories',
 'df_dim_admins',
 'df_dim_plant_faqs',
 'df_dim_user_plants',
 'df_dim_plant_characteristics',
 'df_dim_plants',
 'df_dim_users',
 'df_dim_plant_instruction_categories',
 'df_dim_plant_progresses',
 'df_dim_plant_reminders',
 'df_dim_plant_images']

In [17]:
for dataframe in dataframe_dimensional:
    df_variable_name = f"{dataframe}"
    
    # Cleansing DataFrame
    globals()[df_variable_name] = cleanse_dataframe(globals()[df_variable_name])

Memeriksa missing values...
id            0
title         0
body          0
user_id       0
is_read       0
created_at    0
updated_at    0
plant_id      0
dtype: int64
Memeriksa duplikasi...
Jumlah baris duplikat: 0
Memeriksa missing values...
id                 0
user_id            0
plant_id           0
plant_name         0
plant_category     0
plant_image_url    0
created_at         0
updated_at         0
dtype: int64
Memeriksa duplikasi...
Jumlah baris duplikat: 0
Memeriksa missing values...
id            0
name          0
image_url     0
created_at    0
updated_at    0
dtype: int64
Memeriksa duplikasi...
Jumlah baris duplikat: 0
Memeriksa missing values...
id                         0
plant_id                   0
step_number                0
step_title                 0
step_description           0
step_image_url             2
additional_tips            0
created_at                 0
updated_at                 0
instruction_category_id    0
dtype: int64
Mengisi missing values den

In [18]:
df_dim_plant_characteristics

Unnamed: 0,id,plant_id,height,height_unit,wide,wide_unit,leaf_color
0,46,46,100,centimeter,5,centimeter,Green
1,55,55,1,Centimeter,1,Meter,Green
2,59,59,1,Centimeter,1,Meter,Green


# 4. Load