# Data Preparation

The objective is to clean data, enrich the dataset, and load it to different tables in PostgreSQL database.

In [153]:
import pandas as pd
import psycopg2
import csv
import warnings

warnings.filterwarnings('ignore')

# Orders

In [131]:
df = pd.read_csv('orders.csv')

In [132]:
df['WEIGHT'] = df['QUANTITY']*24

In [133]:
df['AMOUNT'] = ''

In [134]:
prices = {'AJA001':1250, 'SOK001':1230, 'DUN001':1180, 'LOT001':1200, 'PEM001':1250,'BAB001':1210,'MAI001':1190, 'SMA001':1190, 'UMI001':1215}
df['AMOUNT'] = df['PRDTID'].map(prices) * df['QUANTITY']

In [135]:
empty_strings = df[df['AMOUNT'] == '']
print(empty_strings['AMOUNT'])

Series([], Name: AMOUNT, dtype: int64)


In [136]:
df.head()

Unnamed: 0,ID,CSMCODE,ORDDATE,ORDTIME,PRDTID,QUANTITY,WEIGHT,AMOUNT
0,10001,CSM001,14/07/23,6:25:00,AJA001,10,240,12500
1,10001,CSM001,14/07/23,6:30:00,SOK001,10,240,12300
2,10002,CSM003,14/07/23,6:35:00,AJA001,25,600,31250
3,10003,CSM002,14/07/23,6:40:00,DUN001,10,240,11800
4,10003,CSM002,14/07/23,6:45:00,LOT001,20,480,24000


# Products

In [137]:
prd= df[['PRDTID']]
prd['NAME'], prd['STOCK'], prd['RESTOCK'], prd['UNITWEIGHT'] = '', '1500', '1000', '24'

In [138]:
prd['PRDTID'].unique()

array(['AJA001', 'SOK001', 'DUN001', 'LOT001', 'BAB001', 'MAI001',
       'SMA001', 'PEM001', 'UMI001'], dtype=object)

In [139]:
values = {'AJA001':'Ajab HB 2kg', 'SOK001':'Soko HB 2kg', 'DUN001':'Dunia HB 2kg', 'LOT001':'Lotus HB 2kg', 'PEM001':'Pembe HB 2kg','BAB001':'Baba Lao HB 2kg', 'SMA001':'Smatta HB 2kg', 'UMI001':'Umi HB 2kg'}
prd['NAME'] = prd['PRDTID'].map(values)

In [140]:
prd.rename(columns = {'PRDTID':'ID'}, inplace = True)

In [141]:
prd.head()

Unnamed: 0,ID,NAME,STOCK,RESTOCK,UNITWEIGHT
0,AJA001,Ajab HB 2kg,1500,1000,24
1,SOK001,Soko HB 2kg,1500,1000,24
2,AJA001,Ajab HB 2kg,1500,1000,24
3,DUN001,Dunia HB 2kg,1500,1000,24
4,LOT001,Lotus HB 2kg,1500,1000,24


# Pricelist

In [142]:
prc= df[['PRDTID']]

In [143]:
prc['ID'], prc['PRICE'], prc['UOM'] = 'PRC1', '', 'Bale'

In [144]:
prices = {'AJA001':1250, 'SOK001':1230, 'DUN001':1180, 'LOT001':1200, 'PEM001':1250,'BAB001':1210, 'SMA001':1190, 'UMI001':1215}
prc['PRICE'] = prc['PRDTID'].map(prices)

In [145]:
new_index = ['ID', 'PRDTID', 'UOM', 'PRICE']
prc = prc.reindex(columns=new_index)

In [150]:
prc['PRICE'] = prc['PRICE'].astype('Int64')

In [151]:
prc.head()

Unnamed: 0,ID,PRDTID,UOM,PRICE
0,PRC1,AJA001,Bale,1250
1,PRC1,SOK001,Bale,1230
2,PRC1,AJA001,Bale,1250
3,PRC1,DUN001,Bale,1180
4,PRC1,LOT001,Bale,1200


# Save

In [157]:
# The 'index=False 'parameter ensures that the index column is not included in the saved CSV file

df.to_csv('Orders.csv', index=False)
prd.to_csv('Products.csv', index=False)
prc.to_csv('Pricelist.csv', index=False)

In [158]:
table_names = ['Orders', 'Products', 'Pricelist']
csv_files = ['Orders.csv', 'Products.csv', 'Pricelist.csv']

try:
    with psycopg2.connect(
            host="localhost",
            port=5432,
            database="database",
            user="username",
            password="password"
    ) as conn, conn.cursor() as cursor:
        for table_name, csv_file in zip(table_names, csv_files):
            cursor.execute(
                "SELECT EXISTS (SELECT FROM information_schema.tables WHERE table_name = %s)",
                (table_name,)
            )
            table_exists = cursor.fetchone()[0]

            if not table_exists:
                with open(csv_file, 'r') as file:
                    reader = csv.reader(file)
                    headers = next(reader)
                    columns = [f"{header} VARCHAR" for header in headers]
                    create_query = f"CREATE TABLE {table_name} ({','.join(columns)})"

                    cursor.execute(create_query)
                    print(f"Table '{table_name}' created successfully.")

            with open(csv_file, 'r') as file:
                cursor.copy_expert(f"COPY {table_name} FROM STDIN WITH CSV HEADER", file)

                print(f"Data loaded into '{table_name}' table successfully.")

except psycopg2.Error as e:
    print(f"An error occurred: {e}")

Table 'Orders' created successfully.
Data loaded into 'Orders' table successfully.
Table 'Products' created successfully.
Data loaded into 'Products' table successfully.
Table 'Pricelist' created successfully.
Data loaded into 'Pricelist' table successfully.
