In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import requests
from tqdm import tqdm

from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
import os

sns.set_palette('Spectral')
sns.set_context('notebook', font_scale=1)
sns.set_style('whitegrid')
pd.set_option('display.max_columns', None)

import warnings

warnings.filterwarnings('ignore')

# **Beberapa Fungsi Yang Dibutuhkan**

## **0.1 Fungsi Dasar**

### **0.1.0. Untuk menempatkan koma pada 3 angka**

In [2]:
def coma(value):
    str_value = str(value)
    separate_decimal = str_value.split(".")
    after_decimal = separate_decimal[0]
    before_decimal = separate_decimal[1]

    reverse = after_decimal[::-1]
    temp_reverse_value = ""

    for index, val in enumerate(reverse):
        if (index + 1) % 3 == 0 and index + 1 != len(reverse):
            temp_reverse_value = temp_reverse_value + val + "."
        else:
            temp_reverse_value = temp_reverse_value + val

    temp_result = temp_reverse_value[::-1]

    return temp_result

### **0.1.2. Fungsi curency numerik ke Rupiah**

In [3]:
def rupiah(value):
    str_value = str(value)
    separate_decimal = str_value.split(".")
    after_decimal = separate_decimal[0]
    before_decimal = separate_decimal[1]

    reverse = after_decimal[::-1]
    temp_reverse_value = ""

    for index, val in enumerate(reverse):
        if (index + 1) % 3 == 0 and index + 1 != len(reverse):
            temp_reverse_value = temp_reverse_value + val + "."
        else:
            temp_reverse_value = temp_reverse_value + val

    temp_result = temp_reverse_value[::-1]

    return "Rp " + temp_result + "," + before_decimal

### **0.1.3. Fungsi translate hari**

In [4]:
def hari(x):
    if x == 'Monday':
        return '1. Senin'
    elif x == 'Tuesday':
        return '2. Selasa'
    elif x == 'Wednesday':
        return '3. Rabu'
    elif x == 'Thursday':
        return '4. Kamis'
    elif x == 'Friday':
        return '5. Jumat'
    elif x == 'Saturday':
        return '6. Sabtu'
    elif x == 'Sunday':
        return '7. Minggu'

### **0.1.4. Fungsi mengatur ukuran visualisasi 16:9**

In [5]:
def wide(lebar):
    tinggi = (lebar/16) * 9
    return tinggi

### **0.1.5. Fungsi membedakan pagi dan malam hari**

In [6]:
def daylight(x):
    n = str(x)
    n = int(n.split(':')[0])
    if n > 18:
        return 'Sift Malam'
    else:
        return 'Sift Pagi'

### **0.1.6. Fungsi transformasi list data numerik ke list data curency**

In [7]:
def yticks_transform(data):
    labeln = []
    for x in data:
        n = rupiah(x).split(',')[0]
        labeln.append(n)
        
    return labeln

### **0.1.7 Fungsi Transform Week**

In [8]:
def week_transform(x):
    value = x%4
    
    if value == 0:
        value = 1
        return value
    else:
        return value

### **0.1.8. Fungsi Data Transform Waktu**

In [9]:
def transform_time(data):
    data['tahun'] = pd.to_datetime(data.tanggal).dt.year
    data['no_bulan'] = pd.to_datetime(data.tanggal).dt.month
    data['bulan'] = pd.to_datetime(data.tanggal).dt.month_name()
    data['week'] = pd.to_datetime(data.tanggal).dt.week
    data['week'] = data.week.apply(lambda x: week_transform(x))
    data['tgl'] = pd.to_datetime(data.tanggal).dt.day
    data['hari'] = pd.to_datetime(data.tanggal).dt.day_name()
    data['hari'] = data.hari.apply(lambda x: hari(x))
    data['order'] = 1
    return data

### **0.1.9. Search**

In [10]:
def search(word, document):
    index = []
    words = document.split()
    for i, w in enumerate(words):
        if w == word:
            index.append(i)
            
    if len(index) != 0:
        return document
    else:
        return 'None'


### **0.1.10. transform data**

In [11]:
def transform_data(data, branch):
    data['tahun'] = pd.to_datetime(data.tanggal).dt.year
    data['no_bulan'] = pd.to_datetime(data.tanggal).dt.month
    data['bulan'] = pd.to_datetime(data.tanggal).dt.month_name()
    data['week'] = pd.to_datetime(data.tanggal).dt.week
    data['week'] = data.week.apply(lambda x: week_transform(x))
    data['tgl'] = pd.to_datetime(data.tanggal).dt.day
    data['hari'] = pd.to_datetime(data.tanggal).dt.day_name()
    data['hari'] = data.hari.apply(lambda x: hari(x))
    data['hour'] = pd.to_datetime(data.tanggal).dt.hour
    data['order'] = 1
    data['total'] = data.total - (data.total * (data.potongan/100))
    data['branch'] = branch
    data = data[['branch','transaksi', 'tanggal', 'tahun', 'no_bulan', 'bulan', 'week', 'tgl', 'hari', 'hour','customer',
                'user', 'cs', 'kode_item', 'produk', 'quantity', 'satuan', 'harga', 'total', 'order']]
    
    return data

### **0.1.11. columns_transformer**

In [12]:
def correct_feature_text(data):
    n_cols = []
    for x in data.columns:
        n = x.lower()
        n = '_'.join(n.split()[:])
        n_cols.append(n)
        
    data.columns = n_cols
    return data

# **ETL : Extract, Transform, Load**

## **Load Data**

In [13]:
important_columns=['transaksi','tanggal','customer','user','cs','operator','finishing',
                   'suport','alamat','kode_item','produk','quantity','satuan','harga',
                   'tunai', 'kredit', 'debit', 'total', 'potongan','ket']

In [14]:
def get_branch_name(x):
    msg=x.split('_')[0]
    return msg

def read_data_all_format(path, delimiter_=';'):
    try:
        msg=pd.read_csv(path, error_bad_lines=False, delimiter=delimiter_)
    except:
        msg=pd.read_excel(path)
    return msg

def datetime(x):
    try:
        x=str(x)
        day=x.split('/')[0]
        month=x.split('/')[1]
        year=x.split('/')[-1].split()[0]
        hour=x.split()[-1].split(':')[0]
        minute=x.split()[-1].split(':')[1]

        msg=f"{month}/{day}/{year} {hour}:{minute}"
        msg=pd.to_datetime(msg)
    except:
        msg=x
    return msg

def read_data_from_path(path):
    name_path=path
    list_file=os.listdir(name_path)
    try:
        list_file.remove('.DS_Store')
    except:
        list_file=list_file
        
    data=[]
    for x in list_file:
        file_name=name_path+x
        df=read_data_all_format(file_name)
        df['branch']=get_branch_name(x)
        data.append(df)
    
    data=pd.concat(data, axis=0)
    data.reset_index(drop=True, inplace=True)
    data['tanggal']=pd.to_datetime(data.tanggal)
    data=transform_time(data)
    data.sort_values(['tahun','no_bulan','tgl'], ascending=False, inplace=True)
    
    return data

In [15]:
# buat table sekumpulan url untuk penjualan
tbl_url_penjualan = pd.DataFrame({
    'branch':['Utama','Utama','Digital Printing','Digital Printing','Office Equipment','Office Equipment'],
    'years':[2023,2024,2023,2024,2023,2024],
    'urls':[
        "https://docs.google.com/spreadsheets/d/16K25jlGELiE7zEIoKGYPnOTL0-LFe7wV/export?format.xlsx",
        "https://docs.google.com/spreadsheets/d/1t60_O3qnDeeXC-SbhYPdZ5uIhv2kDGsO/export?format.xlsx",
        "https://docs.google.com/spreadsheets/d/18dLHwc8-mOaH0jiMSx8fajvNR8_2FB-z/export?format.xlsx",
        "https://docs.google.com/spreadsheets/d/1_Y-yKyCKaHKox5C3wcXMSV-88C_fsrl9/export?format.xlsx",
        "https://docs.google.com/spreadsheets/d/1vh7spBX3Pdgq2g6QosPuwuJAQWP70GzV/export?format.xlsx",
        "https://docs.google.com/spreadsheets/d/11vIGFy6M1QSnhRBNTAQiNuaVCRKI2VIC/export?format.xlsx"
    ]
})

# tarik data dari spreadsheet
for i in tqdm(range(len(tbl_url_penjualan))):
    n = tbl_url_penjualan[tbl_url_penjualan.index==i]

    branch = n['branch'][i]
    year = str(n['years'][i])[-2:]
    path = f'./Datasets Penjualan/{branch}_penjualan_{year}.xlsx'

    url = n['urls'][i]

    try:
        os.remove(path)
    except:
        None
        
    output_filename = path
    
    # get the data from spreadsheet
    response = requests.get(url)
    if response.status_code == 200:
        with open(output_filename, "wb") as f:
            f.write(response.content)

100%|████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:47<00:00,  7.84s/it]


In [16]:
# buat table sekumpulan url untuk piutang
tbl_url_piutang = pd.DataFrame({
    'branch':['Utama','Utama','Digital Printing','Digital Printing','Office Equipment','Office Equipment'],
    'years':[2023,2024,2023,2024,2023,2024],
    'urls':[
        "https://docs.google.com/spreadsheets/d/1d3i65NALdFqq1P56XVIprGpob-IZwM0E/export?format.xlsx",
        "https://docs.google.com/spreadsheets/d/1Bg-4-7iUsn9RPm4l_ZyMA_vRc1RNU9Ty/export?format.xlsx",
        "https://docs.google.com/spreadsheets/d/1Q6JLGZNm5hLcb5QYB7xb5g2_JVhGMmrA/export?format.xlsx",
        "https://docs.google.com/spreadsheets/d/1TUH0zpGhtELzDdIir0i1ohgcvVzYAftx/export?formax.xlsx",
        "https://docs.google.com/spreadsheets/d/1GBKa1KrSiYNovEW5IuJSTp4Nly4ktz_R/export?format.xlsx",
        "https://docs.google.com/spreadsheets/d/17E1sq4VurQ2vwztztEHgjPG0p3QeD8Ud/export?format.xlsx"
    ]
})

# tarik data dari spreadsheet
for i in tqdm(range(len(tbl_url_piutang))):
    n = tbl_url_piutang[tbl_url_piutang.index==i]

    branch = n['branch'][i]
    year = str(n['years'][i])[-2:]
    path = f'./Datasets Piutang/{branch}_piutang_{year}.xlsx'

    url = n['urls'][i]

    try:
        os.remove(path)
    except:
        None
        
    output_filename = path
    
    # get the data from spreadsheet
    response = requests.get(url)
    if response.status_code == 200:
        with open(output_filename, "wb") as f:
            f.write(response.content)

100%|████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:21<00:00,  3.62s/it]


In [17]:
# buat table sekumpulan url untuk pembayaran piutang
tbl_url_pembayaran = pd.DataFrame({
    'branch':['Utama','Utama','Digital Printing','Digital Printing','Office Equipment','Office Equipment'],
    'years':[2023,2024,2023,2024,2023,2024],
    'urls':[
        "https://docs.google.com/spreadsheets/d/17_M5vpHke3jGVbDtmvRyZSpry5NX6QiS/export?format.xlsx",
        "https://docs.google.com/spreadsheets/d/1vKEyT30cWXjxbd_4WwuuXRx4AWMWxPuf/export?format.xlsx",
        "https://docs.google.com/spreadsheets/d/1BW3LUU0KL7e2x7FYmqJKlw1CBHEiKpHv/export?format.xlsx",
        "https://docs.google.com/spreadsheets/d/1QsLv-0r94-B2pqAXfKYL_DiqR7W5wd_E/export?format.xlsx",
        "https://docs.google.com/spreadsheets/d/1SsV3wSzYf3MVGq4EgvJS-RRJvNTMXLySexport?format.xlsx",
        "https://docs.google.com/spreadsheets/d/1k66KfEEvUZlJfplOoSSbSkFd_Xe0Lb9f/export?format.xlsx"
    ]
})

# tarik data dari spreadsheet
for i in tqdm(range(len(tbl_url_pembayaran))):
    n = tbl_url_piutang[tbl_url_pembayaran.index==i]

    branch = n['branch'][i]
    year = str(n['years'][i])[-2:]
    path = f'./Datasets Pembayaran Piutang/{branch}_pembayaran_{year}.xlsx'

    url = n['urls'][i]

    try:
        os.remove(path)
    except:
        None
        
    output_filename = path
    
    # get the data from spreadsheet
    response = requests.get(url)
    if response.status_code == 200:
        with open(output_filename, "wb") as f:
            f.write(response.content)

100%|████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:15<00:00,  2.57s/it]


## **Transforming Data Penjualan**

In [18]:
data = read_data_from_path('./Datasets Penjualan/')
data = data.drop(columns=['debit', 'Unnamed: 16']).rename(columns={'Unnamed: 17':'debit'})

In [19]:
data=data[['branch', 'transaksi', 'tanggal', 'tahun', 'no_bulan', 'bulan', 'week', 'tgl', 'hari', 'customer', 'user',
           'cs', 'operator','finishing', 'suport', 'alamat', 'kode_item', 'produk', 'quantity','satuan', 'harga', 'tunai', 
           'kredit', 'debit', 'total', 'potongan','order','ket']]

data=data.reset_index(drop=True)

p_num=data.select_dtypes('number')
p_cate=data.select_dtypes('object')

p_num.fillna(0, inplace=True)
p_cate.fillna('unknown', inplace=True)

# numerical have a null data
data['kredit'].fillna(0, inplace=True)
data['tunai'].fillna(0, inplace=True)
data['quantity'].fillna(0, inplace=True)
data['total'].fillna(0, inplace=True)

# categorical have a null data
data['ket'].fillna('unknown', inplace=True)
data['cs'].fillna('unknown', inplace=True)
data['operator'].fillna('unknown', inplace=True)
data['finishing'].fillna('unknown', inplace=True)
data['suport'].fillna('unknown', inplace=True)

def transform_obj_num(x):
    try:
        msg=int(str(x).split(',')[0].split('.')[0])
    except:
        msg=0
    return msg

data['harga']=data['harga'].apply(lambda x: transform_obj_num(x))
data['debit']=data['debit'].apply(lambda x: transform_obj_num(x))

def transform_potongan(x):
    try:
        msg=float('.'.join(x.split(',')))
    except:
        msg=0
    return msg

data['potongan']=data['potongan'].apply(lambda x: transform_potongan(x))

# read lcd
lcd = data[data.branch=='Digital Printing']
lcd.drop(columns='branch', inplace=True)

# read utama
utm = data[data.branch=='Utama']
utm.drop(columns='branch', inplace=True)

# read oe
oe = data[data.branch=='Office Equipment']
oe.drop(columns='branch', inplace=True)

# read master item utama
master = pd.read_excel('/Users/dickyaryanto/Documents/Master Data/udrio/produk/utm_master_item.xls')

# ubah penulisan feature
def correct_feature_text(data):
    ncols = []
    for x in data.columns:
        n = '_'.join(x.lower().split()[:])
        ncols.append(n)
    data.columns = ncols
    return data

lcd = correct_feature_text(lcd)
utm = correct_feature_text(utm)
oe = correct_feature_text(oe)



### **1.1. Cleaning Data LCD**

In [20]:
lcd = transform_time(lcd)
lcd.drop(columns=['operator','finishing','suport','alamat','harga','ket'], inplace=True)
lcd = lcd[['transaksi','tanggal','tahun','no_bulan','bulan','week','tgl','hari','customer','user',
           'cs','kode_item','produk','quantity','satuan','tunai','debit','kredit','total','potongan','order']]

lcd.columns = ['transaksi','tanggal','tahun','no_bulan','bulan','week','tgl','hari','customer','cs',
           'operator','kode_item','produk','quantity','satuan','tunai','debit','kredit','omset','potongan','order']
lcd['omset'] = lcd.omset - (lcd.omset * (lcd.potongan/100))
lcd['branch'] = 'Digital Printing'
lcd.fillna('Kosong', inplace=True)
lcd.drop(columns='potongan', inplace=True)
lcd['produk2']=lcd.produk.apply(lambda x: x.split()[0])
finishing_lcd=[
    'JASA PERPORASI','BIAYA LUBANG','BIAYA PASANG','LAMINATING BIASA F4',
    'LAMINATING BIASA A3','JILID SPIRAL KAWAT A4 UK. 6', 'JILID LEM A4 100+ - 200  LBR',
   'JILID ALBUM', 'JILID LEM', 'JILID SPIRAL KAWAT A4 UK. 5',
   'JILID BK BALUT LAGBAN A3', 'JILID BK BALUT LAGBAN A4',
   'JILID HEKTER TENGAH', 'JILID BK SPIRAL PLASTIC  A4 UK. 1 1/4',
   'JILID SPIRAL KAWAT A4 UK. 7', 'JILID SPIRAL KAWAT A4 UK. 9',
   'JILID SPIRAL KAWAT A4 UK. 14', 'JILID LEM A4 1-100 LBR',
   'JILID BK SPIRAL PLASTIC  A4 UK. 1',
   'JILID BK SPIRAL PLASTIC  A4 UK. 5/8',
   'JILID SPIRAL KAWAT A4 UK. 10',
   'JILID BK SPIRAL PLASTIC  A4 UK. 3/4', 'JILID ALBUM A4 1-100 LBR',
   'JILID BK SPIRAL PLASTIC  A4 UK. 1 1/2',
   'JILID SPIRAL KAWAT A4 UK. 16', 'JILID LEM A4/F4 201+ HAL',
   'JILID SPIRAL KAWAT A4 UK. 18', 'JILID SPIRAL KAWAT A4 UK. 8',
   'JILID SPIRAL KAWAT A4 UK. 20', 'JILID LEM A3',
   'JILID BALUT LAGBAN PLANO',
    'JEPIT KALENDER','PASANG SUDUT BUKU','DOFF LAMINATING','PEKERJAAN FINISHING',
    'TABUNG WISUDA','PRESS PINGGIR','PENGISIAN TINTA TRODAT','BIAYA POTONG'
]

def get_devisi_lcd(x):
    con = x in finishing_lcd
    if con == True:
        msg='JLD'
    else:
        msg='LCD'
    return msg
lcd['devisi']=lcd.produk.apply(lambda x: get_devisi_lcd(x))
lcd=lcd.sort_values(['tahun','no_bulan'], ascending=False)

### **1.2. Cleaning Data UTM**

In [21]:
utm = transform_time(utm)
utm['branch'] = 'Utama'
utm = utm[['branch','transaksi','tanggal','tahun','no_bulan','bulan','week','tgl','hari',
           'customer','cs','kode_item','produk','quantity','satuan','tunai','debit','kredit','total','potongan','order']]

utm.columns = ['branch','transaksi','tanggal','tahun','no_bulan','bulan','week','tgl','hari','customer',
               'cs','kode_item','produk','quantity','satuan','tunai','debit','kredit','omset','potongan','order']

utm = utm[['branch', 'transaksi', 'tanggal','tahun', 'no_bulan', 'bulan', 'week', 'tgl', 
           'hari', 'customer', 'cs', 'kode_item','produk', 'quantity', 'satuan', 'tunai', 'debit', 'kredit', 'omset', 'order']]

utm['operator'] = 'Kosong'
utm['devisi'] = utm.kode_item.apply(lambda x: str(x).split('.')[0])
def devisi_1(x):
    if x == 'CTK' or x == 'LCD':
        return 'CTK'
    elif x == 'KTR':
        return 'Pinjaman Sementara'
    elif x == 'PC':
        return 'PC'
    elif x == 'JLD':
        return 'JLD'
    else:
        return 'ATK'
utm['devisi'] = utm.devisi.apply(lambda x: devisi_1(x))

### **1.3. Cleaning Data OE**

In [22]:
oe = transform_time(oe)
oe['branch'] = 'Office Equipment'

oe = oe[['branch', 'transaksi', 'tanggal','tahun', 'no_bulan', 'bulan', 'week', 
               'tgl', 'hari', 'customer', 'user', 'kode_item','produk', 'quantity', 'satuan', 'tunai','debit','kredit', 'total', 'potongan', 'order']]

oe.columns = ['branch', 'transaksi', 'tanggal','tahun', 'no_bulan', 'bulan', 'week', 
               'tgl', 'hari', 'customer', 'cs', 'kode_item','produk', 'quantity', 'satuan', 'tunai','debit','kredit', 'omset', 'potongan', 'order']

oe['omset'] = oe['omset'] - (oe['omset'] * (oe.potongan/100))
oe['operator'] = 'Kosong'
oe.drop(columns='potongan', inplace=True)
oe['devisi'] = 'OE'

### **1.4. Buat Data Frame Gabung Dari Ketiga Data Tersebut**

In [23]:
df = pd.concat([utm, oe, lcd], axis=0)
df = df[['branch', 'devisi', 'transaksi', 'tanggal', 'tahun', 'no_bulan', 'bulan', 'week',
       'tgl', 'hari', 'customer', 'cs', 'operator', 'kode_item', 'produk', 'quantity',
       'satuan', 'tunai','debit','kredit', 'omset', 'order']]
df['cs'] = df.cs.apply(lambda x: str(x).split('(')[0].split()[0])
df['cs'] = df.cs.apply(lambda x: x.split('_')[0])
df.cs.replace('BELLA','BELA', inplace=True)
df.cs.replace('nan', 'Kosong', inplace=True)
df['customer'] = df.customer.apply(lambda x: x.split(' #')[0])
df['date']=df.no_bulan.apply(lambda x: str(x) + '/')+df.tahun.apply(lambda x: str(x)[-2:])
df=df.rename(columns={'devisi':'divisi'})
df['cs']=df['cs'].apply(lambda x: str(x).split('MLM')[0])
def get_day_work(x):
    time = x.hour
    if time >= 18:
        msg='Malam'
    else:
        msg='Pagi'
    return msg
df['sift_kerja']=df.tanggal.apply(get_day_work)
df['bulan_singkat']=df.bulan.apply(lambda x: str(x)[:3])
df['jam']=df.tanggal.dt.hour
df['transaksi'] = df['transaksi'].apply(lambda x: str(x))
# df.to_excel('./Data/data_penjualan.xlsx', index=False)

In [24]:
df[(df.branch=='Digital Printing') & (df.debit!=0)]

Unnamed: 0,branch,divisi,transaksi,tanggal,tahun,no_bulan,bulan,week,tgl,hari,customer,cs,operator,kode_item,produk,quantity,satuan,tunai,debit,kredit,omset,order,date,sift_kerja,bulan_singkat,jam
12,Digital Printing,LCD,420751,2024-05-22 08:29:35,2024,5,May,1,22,3. Rabu,DIVISI_ONLINE_WA,NURUL,NURUL,10173,LASER COLOR PRINT STIKER 25+ (C),50.0,lbr,0.0,950000,0.0,300000.0,1,5/24,Pagi,May,8
13,Digital Printing,LCD,420751,2024-05-22 08:29:35,2024,5,May,1,22,3. Rabu,DIVISI_ONLINE_WA,NURUL,NURUL,LABEL4,STICKER LABEL/KEMASAN CROMO MIN 4X4,100.0,lbr,0.0,950000,0.0,650000.0,1,5/24,Pagi,May,8
19,Digital Printing,LCD,420758,2024-05-22 08:58:51,2024,5,May,1,22,3. Rabu,ONLINE_WA_PRIBADI,NURUL,NURUL,10175,LASER COLOR PRINT STIKER 200+ (C),200.0,lbr,0.0,1000000,0.0,1000000.0,1,5/24,Pagi,May,8
223,Digital Printing,LCD,420554,2024-05-21 08:42:44,2024,5,May,1,21,2. Selasa,DIVISI_ONLINE_WA,ULAN,LUKMAN,1021,LASER COLOR PRINT STIKER VINIL/TRANS 1+ (C),2.0,lbr,0.0,20000,0.0,20000.0,1,5/24,Pagi,May,8
224,Digital Printing,LCD,420553,2024-05-21 08:42:59,2024,5,May,1,21,2. Selasa,DIVISI_ONLINE_WA,IKA,LUKMAN,10121a,LASER COLOR PRINT 150 gr 10+ (C),13.0,lbr,0.0,58500,0.0,58500.0,1,5/24,Pagi,May,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47748,Digital Printing,LCD,402728,2024-01-03 20:31:00,2024,1,January,1,3,3. Rabu,ONLINE_WA_PRIBADI,LIDYA,AZMI,12098,KALENDER MEJA,50.0,bh,0.0,3800000,0.0,2000000.0,1,1/24,Malam,Jan,20
47749,Digital Printing,LCD,402728,2024-01-03 20:31:00,2024,1,January,1,3,3. Rabu,ONLINE_WA_PRIBADI,LIDYA,AZMI,120981,KALENDER GANTUNG,200.0,bh,0.0,3800000,0.0,1300000.0,1,1/24,Malam,Jan,20
47750,Digital Printing,LCD,402728,2024-01-03 20:31:00,2024,1,January,1,3,3. Rabu,ONLINE_WA_PRIBADI,LIDYA,AZMI,10221,LASER COLOR PRINT STIKER VINIL/TRANS 25+ (C/F),50.0,lbr,0.0,3800000,0.0,500000.0,1,1/24,Malam,Jan,20
47751,Digital Printing,LCD,402745,2024-01-03 20:42:55,2024,1,January,1,3,3. Rabu,CASH/TUNAI,SORAYA,LUKMAN,LABEL4V,STICKER LABEL/KEMASAN VINYL MIN 4X4,25.0,lbr,0.0,500000,0.0,250000.0,1,1/24,Malam,Jan,20


## **Transforming Data Piutang**

In [25]:
file_piutang = os.listdir('Datasets Piutang')
try:
    file_piutang.remove('.DS_Store')
except:
    file_piutang=file_piutang

_ = []
for i in file_piutang:
    branch = i.split('_')[0]
    try:
        n = pd.read_csv(f'./Datasets Piutang/{i}')
    except:
        n = pd.read_excel(f'./Datasets Piutang/{i}')
    n['branch'] = branch
    _.append(n)
pt = pd.concat(_)
pt = pt[['branch','tanggal','transaksi','tanggal_jt','kredit','bayar']].fillna(0).reset_index(drop=True)
pt['transaksi'] = pt['transaksi'].apply(lambda x: str(x))

## **Transforming Data Pembayaran Piutang**

In [26]:
file_pembayaran = os.listdir('Datasets Pembayaran Piutang')
try:
    file_pembayaran.remove('.DS_Store')
except:
    file_pembayaran=file_pembayaran

_ = []
for i in file_pembayaran:
    branch = i.split('_')[0]
    try:
        n = pd.read_csv(f'./Datasets Pembayaran Piutang/{i}')
    except:
        n = pd.read_excel(f'./Datasets Pembayaran Piutang/{i}')
    n['branch'] = branch
    _.append(n)
pb = pd.concat(_)
pb = pb[['branch','tanggal','transaksi','kredit','bayar']]
pb['bayar'].fillna(0, inplace=True)
pb['transaksi'] = pb['transaksi'].apply(lambda x: str(x))

In [27]:
# check = pb[pb.tanggal.dt.year==2024]
# check[(check.bayar!=0) & (check.branch=='Digital Printing')]

# **EDA : Exploratory Data Analyst**