In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import requests
from tqdm import tqdm

from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
import os

sns.set_palette('Spectral')
sns.set_context('notebook', font_scale=1)
sns.set_style('whitegrid')
pd.set_option('display.max_columns', None)

import warnings

warnings.filterwarnings('ignore')

# **Beberapa Fungsi Yang Dibutuhkan**

### **0.1.0. Untuk menempatkan koma pada 3 angka**

In [2]:
def coma(value):
    str_value = str(value)
    separate_decimal = str_value.split(".")
    after_decimal = separate_decimal[0]
    before_decimal = separate_decimal[1]

    reverse = after_decimal[::-1]
    temp_reverse_value = ""

    for index, val in enumerate(reverse):
        if (index + 1) % 3 == 0 and index + 1 != len(reverse):
            temp_reverse_value = temp_reverse_value + val + "."
        else:
            temp_reverse_value = temp_reverse_value + val

    temp_result = temp_reverse_value[::-1]

    return temp_result

### **0.1.2. Fungsi curency numerik ke Rupiah**

In [3]:
def rupiah(value):
    str_value = str(value)
    separate_decimal = str_value.split(".")
    after_decimal = separate_decimal[0]
    before_decimal = separate_decimal[1]

    reverse = after_decimal[::-1]
    temp_reverse_value = ""

    for index, val in enumerate(reverse):
        if (index + 1) % 3 == 0 and index + 1 != len(reverse):
            temp_reverse_value = temp_reverse_value + val + "."
        else:
            temp_reverse_value = temp_reverse_value + val

    temp_result = temp_reverse_value[::-1]

    return "Rp " + temp_result + "," + before_decimal

### **0.1.3. Fungsi translate hari**

In [4]:
def hari(x):
    if x == 'Monday':
        return '1. Senin'
    elif x == 'Tuesday':
        return '2. Selasa'
    elif x == 'Wednesday':
        return '3. Rabu'
    elif x == 'Thursday':
        return '4. Kamis'
    elif x == 'Friday':
        return '5. Jumat'
    elif x == 'Saturday':
        return '6. Sabtu'
    elif x == 'Sunday':
        return '7. Minggu'

### **0.1.4. Fungsi mengatur ukuran visualisasi 16:9**

In [5]:
def wide(lebar):
    tinggi = (lebar/16) * 9
    return tinggi

### **0.1.5. Fungsi membedakan pagi dan malam hari**

In [6]:
def daylight(x):
    n = str(x)
    n = int(n.split(':')[0])
    if n > 18:
        return 'Sift Malam'
    else:
        return 'Sift Pagi'

### **0.1.6. Fungsi transformasi list data numerik ke list data curency**

In [7]:
def yticks_transform(data):
    labeln = []
    for x in data:
        n = rupiah(x).split(',')[0]
        labeln.append(n)
        
    return labeln

### **0.1.7 Fungsi Transform Week**

In [8]:
def week_transform(x):
    value = x%4
    
    if value == 0:
        value = 1
        return value
    else:
        return value

### **0.1.8. Fungsi Data Transform Waktu**

In [9]:
def transform_time(data):
    data['tahun'] = pd.to_datetime(data.tanggal).dt.year
    data['no_bulan'] = pd.to_datetime(data.tanggal).dt.month
    data['bulan'] = pd.to_datetime(data.tanggal).dt.month_name()
    # data['week'] = pd.to_datetime(data.tanggal).dt.week
    # data['week'] = data.week.apply(lambda x: week_transform(x))
    data['tgl'] = pd.to_datetime(data.tanggal).dt.day
    data['hari'] = pd.to_datetime(data.tanggal).dt.day_name()
    data['hari'] = data.hari.apply(lambda x: hari(x))
    data['order'] = 1
    return data

### **0.1.9. Search**

In [10]:
def search(word, document):
    index = []
    words = document.split()
    for i, w in enumerate(words):
        if w == word:
            index.append(i)
            
    if len(index) != 0:
        return document
    else:
        return 'None'


### **0.1.10. transform data**

In [11]:
def transform_data(data, branch):
    data['tahun'] = pd.to_datetime(data.tanggal).dt.year
    data['no_bulan'] = pd.to_datetime(data.tanggal).dt.month
    data['bulan'] = pd.to_datetime(data.tanggal).dt.month_name()
    data['week'] = pd.to_datetime(data.tanggal).dt.week
    data['week'] = data.week.apply(lambda x: week_transform(x))
    data['tgl'] = pd.to_datetime(data.tanggal).dt.day
    data['hari'] = pd.to_datetime(data.tanggal).dt.day_name()
    data['hari'] = data.hari.apply(lambda x: hari(x))
    data['hour'] = pd.to_datetime(data.tanggal).dt.hour
    data['order'] = 1
    data['total'] = data.total - (data.total * (data.potongan/100))
    data['branch'] = branch
    data = data[['branch','transaksi', 'tanggal', 'tahun', 'no_bulan', 'bulan', 'week', 'tgl', 'hari', 'hour','customer',
                'user', 'cs', 'kode_item', 'produk', 'quantity', 'satuan', 'harga', 'total', 'order']]
    
    return data

### **0.1.11. columns_transformer**

In [12]:
def correct_feature_text(data):
    n_cols = []
    for x in data.columns:
        n = x.lower()
        n = '_'.join(n.split()[:])
        n_cols.append(n)
        
    data.columns = n_cols
    return data

In [13]:
def get_branch_name(x):
    msg=x.split('_')[0]
    return msg

def read_data_all_format(path, delimiter_=';'):
    try:
        msg=pd.read_csv(path, error_bad_lines=False, delimiter=delimiter_)
    except:
        msg=pd.read_excel(path)
    return msg

def datetime(x):
    try:
        x=str(x)
        day=x.split('/')[0]
        month=x.split('/')[1]
        year=x.split('/')[-1].split()[0]
        hour=x.split()[-1].split(':')[0]
        minute=x.split()[-1].split(':')[1]

        msg=f"{month}/{day}/{year} {hour}:{minute}"
        msg=pd.to_datetime(msg)
    except:
        msg=x
    return msg

def read_data_from_path(path):
    name_path=path
    list_file=os.listdir(name_path)
    try:
        list_file.remove('.DS_Store')
    except:
        list_file=list_file
        
    data=[]
    for x in list_file:
        file_name=name_path+x
        df=read_data_all_format(file_name)
        df['branch']=get_branch_name(x)
        data.append(df)
    
    data=pd.concat(data, axis=0)
    data.reset_index(drop=True, inplace=True)
    data['tanggal']=pd.to_datetime(data.tanggal)
    data=transform_time(data)
    data.sort_values(['tahun','no_bulan','tgl'], ascending=False, inplace=True)
    
    return data

# **ETL : Extract, Transform, Load**

## **Load Data**

### Load Datasets Penjualan

In [14]:
important_columns=['transaksi','tanggal','customer','user','cs','operator','finishing',
                   'suport','alamat','kode_item','produk','quantity','satuan','harga',
                   'tunai', 'kredit', 'debit', 'total', 'potongan','ket']

In [15]:
# buat table sekumpulan url untuk penjualan
tbl_url_penjualan = pd.DataFrame({
    'branch':['Utama','Utama','Digital Printing','Digital Printing','Office Equipment','Office Equipment'],
    'years':[2023,2024,2023,2024,2023,2024],
    'urls':[
        "https://docs.google.com/spreadsheets/d/16K25jlGELiE7zEIoKGYPnOTL0-LFe7wV/export?format.xlsx",
        "https://docs.google.com/spreadsheets/d/1t60_O3qnDeeXC-SbhYPdZ5uIhv2kDGsO/export?format.xlsx",
        "https://docs.google.com/spreadsheets/d/18dLHwc8-mOaH0jiMSx8fajvNR8_2FB-z/export?format.xlsx",
        "https://docs.google.com/spreadsheets/d/1_Y-yKyCKaHKox5C3wcXMSV-88C_fsrl9/export?format.xlsx",
        "https://docs.google.com/spreadsheets/d/1vh7spBX3Pdgq2g6QosPuwuJAQWP70GzV/export?format.xlsx",
        "https://docs.google.com/spreadsheets/d/11R9hlf6x5cfqhqiynzKxouVxPMosjYNl/export?format.xlsx"
    ]
})

# tarik data dari spreadsheet
for i in tqdm(range(len(tbl_url_penjualan))):
    n = tbl_url_penjualan[tbl_url_penjualan.index==i]

    branch = n['branch'][i]
    year = str(n['years'][i])[-2:]
    path = f'./Datasets Penjualan/{branch}_penjualan_{year}.xlsx'

    url = n['urls'][i]

    try:
        os.remove(path)
    except:
        None
        
    output_filename = path
    
    # get the data from spreadsheet
    response = requests.get(url)
    if response.status_code == 200:
        with open(output_filename, "wb") as f:
            f.write(response.content)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:24<00:00,  4.14s/it]


### Load Datasets Piutang

In [16]:
# buat table sekumpulan url untuk piutang
tbl_url_piutang = pd.DataFrame({
    'branch':['Utama','Utama','Digital Printing','Digital Printing','Office Equipment','Office Equipment'],
    'years':[2023,2024,2023,2024,2023,2024],
    'urls':[
        "https://docs.google.com/spreadsheets/d/1d3i65NALdFqq1P56XVIprGpob-IZwM0E/export?format.xlsx",
        "https://docs.google.com/spreadsheets/d/1Bg-4-7iUsn9RPm4l_ZyMA_vRc1RNU9Ty/export?format.xlsx",
        "https://docs.google.com/spreadsheets/d/1Q6JLGZNm5hLcb5QYB7xb5g2_JVhGMmrA/export?format.xlsx",
        "https://docs.google.com/spreadsheets/d/1ZoQjTKYGjwDVYv6o1fhEU0V8JFTNyflu/export?format.xlsx",
        "https://docs.google.com/spreadsheets/d/1GBKa1KrSiYNovEW5IuJSTp4Nly4ktz_R/export?format.xlsx",
        "https://docs.google.com/spreadsheets/d/17E1sq4VurQ2vwztztEHgjPG0p3QeD8Ud/export?format.xlsx"
    ]
})

# tarik data dari spreadsheet
for i in tqdm(range(len(tbl_url_piutang))):
    n = tbl_url_piutang[tbl_url_piutang.index==i]

    branch = n['branch'][i]
    year = str(n['years'][i])[-2:]
    path = f'./Datasets Piutang/{branch}_piutang_{year}.xlsx'

    url = n['urls'][i]

    try:
        os.remove(path)
    except:
        None
        
    output_filename = path
    
    # get the data from spreadsheet
    response = requests.get(url)
    if response.status_code == 200:
        with open(output_filename, "wb") as f:
            f.write(response.content)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:09<00:00,  1.58s/it]


### Load Datasets Pemakaian

In [17]:
# buat table sekumpulan url untuk pemakaian
tbl_url_pemakaian = pd.DataFrame({
    'branch':['Utama'],
    'urls':["https://docs.google.com/spreadsheets/d/1FODOcWN-NvYlZ_80SWDTDNGFOyYk46mB/export?format.xlsx"]
})

# tarik data dari spreadsheet
for i in tqdm(range(len(tbl_url_pemakaian))):
    n = tbl_url_pemakaian[tbl_url_pemakaian.index==i]

    branch = n['branch'][i]
    path = f'./Datasets Pemakaian/{branch}_pemakaian.xlsx'

    url = n['urls'][i]

    try:
        os.remove(path)
    except:
        None
        
    output_filename = path
    
    # get the data from spreadsheet
    response = requests.get(url)
    if response.status_code == 200:
        with open(output_filename, "wb") as f:
            f.write(response.content)

out = pd.read_excel('./Datasets Pemakaian/Utama_pemakaian.xlsx')

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.98s/it]


### Load Datasets Opname

In [18]:
# buat table url untuk opname
tbl_url_opname = pd.DataFrame({
    'branch':['Utama'],
    'urls':["https://docs.google.com/spreadsheets/d/1dt_NNTmgcf5J6c555x_ssvTOAtXeurQ-a47n32iaRwE/export?format.xlsx"]
})

# tarik data dari spreadsheet
for i in tqdm(range(len(tbl_url_opname))):
    n = tbl_url_opname[tbl_url_opname.index==i]

    branch = n['branch'][i]
    path = f'./Datasets Opname/{branch}_opname.xlsx'

    url = n['urls'][i]

    try:
        os.remove(path)
    except:
        None
        
    output_filename = path
    
    # get the data from spreadsheet
    response = requests.get(url)
    if response.status_code == 200:
        with open(output_filename, "wb") as f:
            f.write(response.content)

# transform and clean data
opname = pd.read_excel('./Datasets Opname/Utama_opname.xlsx', sheet_name='Sheet2')
opname = opname[1:].drop(columns=['Unnamed: 11','Unnamed: 13','ID Device','Name Device','KETERANGAN','BATAL'])
opname.columns = ['timestamps','id_rows','barcode','jenis_opname','count','user','gudang','status']
opname = opname.dropna(axis=0)
opname['id_rows'] = opname['id_rows'].apply(lambda x: str(x).split('.')[0])
opname['barcode'] = opname['barcode'].apply(lambda x: str(x).split('.')[0])
opname['tanggal'] = pd.to_datetime(opname.timestamps).dt.date
opname = opname[['timestamps','tanggal','id_rows','barcode','jenis_opname','count','user','gudang','status']]
opname = opname[opname.status==1]
opname.user.replace('Ida', 'Alda', inplace=True)
opname['number'] = 1

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:09<00:00,  9.47s/it]


### Load Datasets Master Items

In [19]:
# buat table url untuk opname
tbl_url_master_item = pd.DataFrame({
    'branch':['Utama'],
    'urls':["https://docs.google.com/spreadsheets/d/1ZtkwRPsfQwfymXHvG7PJ-cgg6KYo8aLS/export?format.xlsx"]
})

# tarik data dari spreadsheet
for i in tqdm(range(len(tbl_url_master_item))):
    n = tbl_url_master_item[tbl_url_master_item.index==i]

    branch = n['branch'][i]
    path = f'./Datasets Master Items/{branch}_master_items.xlsx'

    url = n['urls'][i]

    try:
        os.remove(path)
    except:
        None
        
    output_filename = path
    
    # get the data from spreadsheet
    response = requests.get(url)
    if response.status_code == 200:
        with open(output_filename, "wb") as f:
            f.write(response.content)

# transform and clean data
mi = pd.read_excel('./Datasets Master Items/Utama_master_items.xlsx')
mi.columns = ['_'.join(i.lower().split()) for i in mi.columns]
mi = mi.dropna(axis=0)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.52s/it]




### Load Datasets Project Finishing

In [20]:
# buat table url untuk opname
tbl_url_project_finishing = pd.DataFrame({
    'branch':['Utama'],
    'urls':["https://docs.google.com/spreadsheets/d/1gOqYyy4TO7pcVFgJuqvrU5UtN9-GNsjoAggolBWULKs/export?format.xlsx"]
})

# tarik data dari spreadsheet
for i in tqdm(range(len(tbl_url_project_finishing))):
    n = tbl_url_project_finishing[tbl_url_project_finishing.index==i]

    branch = n['branch'][i]
    path = f'./Datasets Project Finishing/{branch}_project_finishing.xlsx'

    url = n['urls'][i]

    try:
        os.remove(path)
    except:
        None
        
    output_filename = path
    
    # get the data from spreadsheet
    response = requests.get(url)
    if response.status_code == 200:
        with open(output_filename, "wb") as f:
            f.write(response.content)

# transform and clean data
pf = pd.read_excel('./Datasets Project Finishing/Utama_project_finishing.xlsx')
pf.columns = ['timestamps','branch','transaksi','crew_finishing','product','qty','otorisator','satuan']
pf = pf[['timestamps','branch','transaksi','crew_finishing','product','qty','satuan','otorisator']]
pf['transaksi'] = pf['transaksi'].apply(lambda x: str(x))

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.01s/it]


### Load Datasets Pelanggan

In [21]:
# buat table sekumpulan url untuk penjualan
tbl_pelanggan = pd.DataFrame({
    'branch':['Utama','Digital Printing','Office Equipment'],
    'urls':[
        "https://docs.google.com/spreadsheets/d/1MLmG4nXtiwc3B9roATi-_4qlqVuGkLEj/export?format.xlsx",
        "https://docs.google.com/spreadsheets/d/1X2aTI66UebrUq-B1oC4pDB6qmW9CB3dV/export?format.xlsx",
        "https://docs.google.com/spreadsheets/d/1dSovpMyDuoewfohMbzAPyPliR5xshD5O/export?format.xlsx",
    ]
})

# tarik data dari spreadsheet
for i in tqdm(range(len(tbl_pelanggan))):
    n = tbl_pelanggan[tbl_pelanggan.index==i]

    branch = n['branch'][i]
    path = f'./Datasets Pelanggan/{branch}_pelanggan.xlsx'

    url = n['urls'][i]

    try:
        os.remove(path)
    except:
        None
        
    output_filename = path
    
    # get the data from spreadsheet
    response = requests.get(url)
    if response.status_code == 200:
        with open(output_filename, "wb") as f:
            f.write(response.content)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:04<00:00,  1.52s/it]


## **Transforming**

### Transforming Data Penjualan

In [22]:
data = read_data_from_path('./Datasets Penjualan/')
data = data.drop(columns=['debit', 'Unnamed: 16']).rename(columns={'Unnamed: 17':'debit'})

In [23]:
data=data[['branch', 'transaksi', 'tanggal', 'tahun', 'no_bulan', 'bulan', 'tgl', 'hari', 'customer', 'user',
           'cs', 'operator','finishing', 'suport', 'alamat', 'kode_item', 'produk', 'quantity','satuan', 'harga', 'tunai', 
           'kredit', 'debit', 'total', 'potongan','order','ket']]

data=data.reset_index(drop=True)

p_num=data.select_dtypes('number')
p_cate=data.select_dtypes('object')

p_num.fillna(0, inplace=True)
p_cate.fillna('unknown', inplace=True)

# numerical have a null data
data['kredit'].fillna(0, inplace=True)
data['tunai'].fillna(0, inplace=True)
data['quantity'].fillna(0, inplace=True)
data['total'].fillna(0, inplace=True)

# categorical have a null data
data['ket'].fillna('unknown', inplace=True)
data['cs'].fillna('unknown', inplace=True)
data['operator'].fillna('unknown', inplace=True)
data['finishing'].fillna('unknown', inplace=True)
data['suport'].fillna('unknown', inplace=True)

def transform_obj_num(x):
    try:
        msg=int(str(x).split(',')[0].split('.')[0])
    except:
        msg=0
    return msg

data['harga']=data['harga'].apply(lambda x: transform_obj_num(x))
data['debit']=data['debit'].apply(lambda x: transform_obj_num(x))

def transform_potongan(x):
    try:
        msg=float('.'.join(x.split(',')))
    except:
        msg=0
    return msg

data['potongan']=data['potongan'].apply(lambda x: transform_potongan(x))

# read lcd
lcd = data[data.branch=='Digital Printing']
lcd.drop(columns='branch', inplace=True)

# read utama
utm = data[data.branch=='Utama']
utm.drop(columns='branch', inplace=True)

# read oe
oe = data[data.branch=='Office Equipment']
oe.drop(columns='branch', inplace=True)

# read master item utama
master = pd.read_excel('D:/PT FEATURES 202/Client/RIO Group/Datasets Master Items/Utama_master_items.xlsx')

# ubah penulisan feature
def correct_feature_text(data):
    ncols = []
    for x in data.columns:
        n = '_'.join(x.lower().split()[:])
        ncols.append(n)
    data.columns = ncols
    return data

lcd = correct_feature_text(lcd)
utm = correct_feature_text(utm)
oe = correct_feature_text(oe)



#### 1.1. Cleaning Data LCD

In [24]:
lcd = transform_time(lcd)
lcd.drop(columns=['operator','finishing','suport','alamat','harga'], inplace=True)
lcd = lcd[['transaksi','tanggal','tahun','no_bulan','bulan','tgl','hari','customer','user',
           'cs','kode_item','produk','quantity','satuan','tunai','debit','kredit','total','potongan','order','ket']]

lcd.columns = ['transaksi','tanggal','tahun','no_bulan','bulan','tgl','hari','customer','cs',
           'operator','kode_item','produk','quantity','satuan','tunai','debit','kredit','omset','potongan','order','ket']

lcd['omset'] = lcd.omset - (lcd.omset * (lcd.potongan/100))
lcd['branch'] = 'Digital Printing'
lcd.fillna('Kosong', inplace=True)
lcd.drop(columns='potongan', inplace=True)
lcd['produk2']=lcd.produk.apply(lambda x: x.split()[0])
finishing_lcd=[
    'JASA PERPORASI','BIAYA LUBANG','BIAYA PASANG','LAMINATING BIASA F4',
    'LAMINATING BIASA A3','JILID SPIRAL KAWAT A4 UK. 6', 'JILID LEM A4 100+ - 200  LBR',
   'JILID ALBUM', 'JILID LEM', 'JILID SPIRAL KAWAT A4 UK. 5',
   'JILID BK BALUT LAGBAN A3', 'JILID BK BALUT LAGBAN A4',
   'JILID HEKTER TENGAH', 'JILID BK SPIRAL PLASTIC  A4 UK. 1 1/4',
   'JILID SPIRAL KAWAT A4 UK. 7', 'JILID SPIRAL KAWAT A4 UK. 9',
   'JILID SPIRAL KAWAT A4 UK. 14', 'JILID LEM A4 1-100 LBR',
   'JILID BK SPIRAL PLASTIC  A4 UK. 1',
   'JILID BK SPIRAL PLASTIC  A4 UK. 5/8',
   'JILID SPIRAL KAWAT A4 UK. 10',
   'JILID BK SPIRAL PLASTIC  A4 UK. 3/4', 'JILID ALBUM A4 1-100 LBR',
   'JILID BK SPIRAL PLASTIC  A4 UK. 1 1/2',
   'JILID SPIRAL KAWAT A4 UK. 16', 'JILID LEM A4/F4 201+ HAL',
   'JILID SPIRAL KAWAT A4 UK. 18', 'JILID SPIRAL KAWAT A4 UK. 8',
   'JILID SPIRAL KAWAT A4 UK. 20', 'JILID LEM A3',
   'JILID BALUT LAGBAN PLANO',
    'JEPIT KALENDER','PASANG SUDUT BUKU','DOFF LAMINATING','PEKERJAAN FINISHING',
    'TABUNG WISUDA','PRESS PINGGIR','PENGISIAN TINTA TRODAT','BIAYA POTONG'
]

def get_devisi_lcd(x):
    con = x in finishing_lcd
    if con == True:
        msg='JLD'
    else:
        msg='LCD'
    return msg
lcd['devisi']=lcd.produk.apply(lambda x: get_devisi_lcd(x))
lcd=lcd.sort_values(['tahun','no_bulan'], ascending=False)

#### 1.2. Cleaning Data UTM

In [25]:
utm = transform_time(utm)
utm['branch'] = 'Utama'
utm = utm[['branch','transaksi','tanggal','tahun','no_bulan','bulan','tgl','hari','customer','cs',
           'kode_item','produk','quantity','satuan','tunai','debit','kredit','total','potongan','order','ket']]

utm.columns = ['branch','transaksi','tanggal','tahun','no_bulan','bulan','tgl','hari','customer',
               'cs','kode_item','produk','quantity','satuan','tunai','debit','kredit','omset','potongan','order','ket']

utm = utm[['branch', 'transaksi', 'tanggal','tahun', 'no_bulan', 'bulan', 'tgl', 
           'hari', 'customer', 'cs', 'kode_item','produk', 'quantity', 'satuan', 'tunai', 'debit', 'kredit', 'omset', 'order','ket']]

utm['operator'] = 'Kosong'
utm['devisi'] = utm.kode_item.apply(lambda x: str(x).split('.')[0])
def devisi_1(x):
    if x == 'CTK' or x == 'LCD':
        return 'CTK'
    elif x == 'KTR':
        return 'Pinjaman Sementara'
    elif x == 'PC':
        return 'PC'
    elif x == 'JLD':
        return 'JLD'
    else:
        return 'ATK'
utm['devisi'] = utm.devisi.apply(lambda x: devisi_1(x))
utm = utm.drop_duplicates()

#### 1.3. Cleaning Data OE

In [26]:
oe = transform_time(oe)
oe['branch'] = 'Office Equipment'

oe = oe[['branch', 'transaksi', 'tanggal','tahun', 'no_bulan', 'bulan', 'tgl', 'hari', 'customer', 'user', 
         'kode_item','produk', 'quantity', 'satuan', 'tunai','debit','kredit', 'total', 'potongan', 'order', 'ket']]

oe.columns = ['branch', 'transaksi', 'tanggal','tahun', 'no_bulan', 'bulan', 'tgl', 'hari', 'customer', 'cs', 
              'kode_item','produk', 'quantity', 'satuan', 'tunai','debit','kredit', 'omset', 'potongan', 'order', 'ket']

oe['omset'] = oe['omset'] - (oe['omset'] * (oe.potongan/100))
oe['operator'] = 'Kosong'
oe.drop(columns='potongan', inplace=True)
oe['devisi'] = 'OE'

#### 1.4. Buat Data Frame Gabung Dari Ketiga Data Tersebut

In [27]:
df = pd.concat([utm, oe, lcd], axis=0)
df = df[['branch', 'devisi', 'transaksi', 'tanggal', 'tahun', 'no_bulan', 'bulan',
       'tgl', 'hari', 'customer', 'cs', 'operator', 'kode_item', 'produk', 'quantity',
       'satuan', 'tunai','debit','kredit', 'omset', 'order','ket']]
df['cs'] = df.cs.apply(lambda x: str(x).split('(')[0].split()[0])
df['cs'] = df.cs.apply(lambda x: x.split('_')[0])
df.cs.replace('BELLA','BELA', inplace=True)
df.cs.replace('nan', 'Kosong', inplace=True)
df['customer'] = df.customer.apply(lambda x: str(x).split(' #')[0])
df['date']=df.no_bulan.apply(lambda x: str(x) + '/')+df.tahun.apply(lambda x: str(x)[-2:])
df=df.rename(columns={'devisi':'divisi'})
df['cs']=df['cs'].apply(lambda x: str(x).split('MLM')[0])
def get_day_work(x):
    time = x.hour
    if time >= 18:
        msg='Malam'
    else:
        msg='Pagi'
    return msg

# filter data kotor
df = df[df.tanggal != 'Kosong']
df['tanggal'] = pd.to_datetime(df['tanggal'])
df['sift_kerja']=df.tanggal.apply(get_day_work)
df['bulan_singkat']=df.bulan.apply(lambda x: str(x)[:3])
df['jam']=df.tanggal.dt.hour
df['transaksi'] = df['transaksi'].apply(lambda x: str(x))
df['kredit'] = df['kredit'].apply(lambda x: transform_obj_num(x))
df.to_excel('./Data/data_penjualan.xlsx', index=False)

### Transforming Data Piutang

In [28]:
file_piutang = os.listdir('Datasets Piutang')
try:
    file_piutang.remove('.DS_Store')
except:
    file_piutang=file_piutang

_ = []
for i in file_piutang:
    branch = i.split('_')[0]
    try:
        n = pd.read_csv(f'./Datasets Piutang/{i}')
    except:
        n = pd.read_excel(f'./Datasets Piutang/{i}')
    n['branch'] = branch
    _.append(n)
pt = pd.concat(_)
pt = pt[['branch','tanggal','transaksi','tanggal_jt','kredit','bayar']].fillna(0).reset_index(drop=True)
pt['transaksi'] = pt['transaksi'].apply(lambda x: str(x))

### Transforming Data Pembayaran Piutang

In [29]:
# file_pembayaran = os.listdir('Datasets Pembayaran Piutang')
# try:
#     file_pembayaran.remove('.DS_Store')
# except:
#     file_pembayaran=file_pembayaran

# _ = []
# for i in file_pembayaran:
#     branch = i.split('_')[0]
#     try:
#         n = pd.read_csv(f'./Datasets Pembayaran Piutang/{i}')
#     except:
#         n = pd.read_excel(f'./Datasets Pembayaran Piutang/{i}')
#     n['branch'] = branch
#     _.append(n)
# pb = pd.concat(_)
# pb = pb[['branch','tanggal','transaksi','kredit','bayar']]
# pb['bayar'].fillna(0, inplace=True)
# pb['transaksi'] = pb['transaksi'].apply(lambda x: str(x))

### Transforming Data Pelanggan

In [30]:
file_pelanggan = os.listdir('Datasets Pelanggan')
try:
    file_pelanggan.remove('.DS_Store')
except:
    file_pelanggan=file_pelanggan

_ = []
for i in file_pelanggan:
    branch = i.split('_')[0]
    try:
        n = pd.read_csv(f'./Datasets Pelanggan/{i}')
    except:
        n = pd.read_excel(f'./Datasets Pelanggan/{i}')
    n['branch'] = branch
    _.append(n)
    
pl1 = pd.concat(_)
pl1 = pl1[['branch','kode_customer','customer','alamat','kota','provinsi','telepon','email']]
pl1.columns = ['branch','id','customer','alamat','kota','provinsi','telepon','email']
pl1.fillna('-', inplace=True)

# **EDA : Exploratory Data Analyst**

## **DASHBOARD - 1 : Trend Penjualan**

- Tunai
- Debit
- Kredit
- Sisa
- Total

In [31]:
# buat fungsi untuk membuat id pertemuan
def get_id(a1, a2):
    msg = str(a1) + '#' + str(a2)
    return msg
    
# trend penjualan
eda1 = df[['branch','divisi','cs','customer','tanggal','transaksi','tunai','debit','kredit']].drop_duplicates(subset='transaksi')
eda1['kredit'] = eda1['kredit'].apply(lambda x: transform_obj_num(str(x)))
eda1['tanggal'] = eda1.tanggal.dt.date
# eda1 = eda1.groupby(['branch','tanggal'])[['tunai','debit','kredit']].sum()
eda1['total'] = eda1.tunai + eda1.debit + eda1.kredit
# eda1['id'] = eda1.apply(lambda x: get_id(x.branch, x.tanggal), axis=1)


# gabungkan 
eda1 = eda1.merge(pt[['transaksi','kredit']].rename(columns={'kredit':'sisa'}), on='transaksi', how='left')
eda1['sisa'].fillna(0, inplace=True)
eda1['bayar'] = eda1.total - eda1.sisa
eda1 = eda1.groupby(['branch','divisi','cs','customer','tanggal'])[['tunai','debit','kredit','total','bayar','sisa']].sum().reset_index()

# buat status sebagai pemisah data number nya
status = ['tunai','debit','kredit','total','bayar','sisa']
eda1_ = []
for i in status:
    n = eda1[['branch','divisi','cs','customer','tanggal',i]]
    n = n.rename(columns={i:'values'})
    n['status'] = i
    eda1_.append(n)
eda1 = pd.concat(eda1_)
eda1 = eda1[eda1.divisi!='Pinjaman Sementara']

# untuk sementara saja
umum_offline = ['CASH/TUNAI','UMUM','UMUM KREDIT','(TIDAK DIGUNAKAN PERIODE 14/10/23) UMUM']
umum_online = ['ONLINE_WA_PRIBADI','DIVISI_ONLINE_WA','PERIODE (31/10/22 - 07/09/23) ONLINE_WA']

def classification_pelanggan(Pelanggan):
    if Pelanggan in umum_offline:
        msg = 'Umum Offline'
    elif Pelanggan in umum_online:
        msg = 'Umum Online'
    else:
        con = Pelanggan.split()[0]
        if con == 'SPK':
            msg = 'SPK'
        else:
            msg = 'Korporate'
    return msg
    
pl2 = eda1[['branch','customer']].drop_duplicates()
pl2['klasifikasi_pelanggan'] = pl2.customer.apply(classification_pelanggan)
pl2.drop_duplicates(inplace=True)

eda1 = eda1.merge(pl2[['customer','klasifikasi_pelanggan']], on='customer', how='left').drop_duplicates()

# buat fungsi untuk dapatkan SPK
def get_spk(x):
    con = x.split()[0]
    if con == 'SPK':
        msg = 'Include'
    else:
        msg = 'Exclude'
    return msg

eda1['spk'] = eda1.customer.apply(get_spk)

# save data
eda1 = eda1[pd.to_datetime(eda1.tanggal).dt.year>=2023]

In [32]:
# c = eda1.copy()
# c['my'] = pd.to_datetime(c.tanggal).dt.strftime('%m/%y')
# c['year'] = pd.to_datetime(c.tanggal).dt.year
# c = c[c.status=='total']
# c = c[c.spk=='Exclude']
# c = c.groupby(['year','my','branch','klasifikasi_pelanggan','cs'])[['values']].sum().reset_index()

# _ = []
# for i in c.my.unique():
#     n = c[c.my==i]
#     for j in n.branch.unique():
#         m = n[n.branch==j]
#         for k in m.klasifikasi_pelanggan.unique():
#             o = m[m.klasifikasi_pelanggan==k]
#             o = o.sort_values('values', ascending=False)
#             o['p_values'] = o['values']/o['values'].sum()
#             _.append(o)

# c = pd.concat(_)
# c.to_excel('./Data/Analisa 4 pertumbuhan karyawan.xlsx', index=False)

In [33]:
eda1.to_excel('./Data/1. dashboard penjualan.xlsx')

## **DASHBOARD - 2 : Progress Opname**

In [34]:
opname.groupby(['tanggal','gudang','user'])[['number','count']].sum().reset_index().to_excel('./Data/2. dashboard progress opname.xlsx', index=False)

In [35]:
len_data_items = len(mi)

progres = opname.groupby(['tanggal','gudang'])[['number']].sum().reset_index()

progres_utm = progres[progres.gudang=='Utama']
progres_utm['progres'] = progres_utm.number.cumsum()
progres_utm['persentase_progres'] = progres_utm['progres'] / len_data_items
progres_utm.to_excel('./Data/3. dashboard progress opname 2.xlsx', index=False)

In [36]:
opname.groupby(['tanggal'])[['number']].sum().reset_index().number.mean()

23.25

In [37]:
len_data_items / 17

210.88235294117646

In [38]:
len_data_items

3585

## **LAPORAN - 1 : Piutang SPK**

In [39]:
l1 = df[df.branch=='Digital Printing']
l1['spk'] = l1.customer.apply(lambda x: str(x).split()[0])
l1 = l1[l1.spk == 'SPK']
l1['date'] = l1.tanggal.dt.date
l1 = l1[['customer','date','transaksi','tunai','kredit','debit','ket']].drop_duplicates()
l1 = pt[['transaksi']].merge(l1, on='transaksi', how='inner')
l1['total'] = l1.tunai + l1.kredit + l1.debit
l1['year'] = pd.to_datetime(l1.date).dt.year
l1['month'] = pd.to_datetime(l1.date).dt.month_name()
l1 = l1.groupby(['customer','year','month','date','transaksi','ket'])[['total']].sum().reset_index()
l1.to_excel('./Data Piutang Report/1. Rekap Piutang.xlsx', index=False)

In [40]:
import re

def extract_values(input_string):
    # Ekspresi reguler untuk menemukan angka di antara tanda kurung
    pattern = r'\((\d+)\)'
    
    # Cari semua kemunculan pola dalam string
    matches = re.findall(pattern, input_string)
    
    # Ubah hasil pencarian menjadi list integer
    result = [str(match) for match in matches]

    if len(result) == 0:
        msg = 'Non SPK'
    else:
        msg = result
    
    return msg

In [41]:
utm = df[df.branch=='Utama']
utm['date'] = utm.tanggal.apply(lambda x: str(x).split()[0])
utm = utm[utm['date'].isin(['2024-06-24','2024-06-25','2024-06-26','2024-06-27','2024-06-28','2024-06-29','2024-06-30'])]
utm = utm.groupby(['date','branch','divisi','transaksi','ket'])[['omset']].sum().reset_index()
utm = utm[utm.ket!='unknown']
utm['ket'] = utm['ket'].apply(lambda x: extract_values(x))
utm = utm[utm.ket!='Non SPK']
utm = utm.explode('ket').ffill()
utm = utm.groupby(['date','transaksi','ket'])[['omset']].sum().reset_index()

In [42]:
l1['date'] = l1['date'].apply(lambda x: str(x).split()[0])

In [43]:
spk = l1.copy()
# spk = l1[l1['date'].isin(['2024-06-24','2024-06-25','2024-06-26','2024-06-27','2024-06-28','2024-06-29','2024-06-30'])]
spk = spk.groupby(['date','transaksi'])[['total']].sum().reset_index()

In [44]:
spk = spk.merge(utm[['ket','transaksi','omset']], left_on='transaksi', right_on='ket', how='left').drop(columns='ket')
spk.columns = ['date','no_lcd','omset_spk','no_utm','omset_utm']
spk['no_utm'].fillna('-', inplace=True)
spk['omset_utm'].fillna(0, inplace=True)
spk = spk.merge(l1[['customer','transaksi','ket']], left_on='no_lcd', right_on='transaksi', how='left').drop(columns='transaksi')
spk['sisa'] = spk.omset_utm - spk.omset_spk
spk = spk = spk[['date','customer','no_lcd','no_utm','omset_spk','omset_utm','sisa','ket']]

In [45]:
spk.to_excel('./Data/SPK x Penjualan utm.xlsx', index=False)

In [46]:
# l1 = df[df.branch=='Digital Printing']

# l1['spk'] = l1.customer.apply(lambda x: str(x).split()[0])
# l1 = l1[l1.spk != 'SPK']

# l1['date'] = l1.tanggal.dt.date
# l1 = l1[['customer','date','transaksi','tunai','kredit','debit','ket']].drop_duplicates()
# l1 = pt[['transaksi']].merge(l1, on='transaksi', how='inner')

# l1['total'] = l1.tunai + l1.kredit + l1.debit
# l1['year'] = pd.to_datetime(l1.date).dt.year
# l1['month'] = pd.to_datetime(l1.date).dt.month_name()

# l1 = l1.groupby(['customer','year','month','date','transaksi','ket'])[['total']].sum().reset_index()
# l1.to_excel('./Data Piutang Report/1. Rekap Piutang Non SPK.xlsx', index=False)

In [47]:
# d1 = pd.read_excel('1.xlsx')
# d2 = pd.read_excel('2.xlsx')
# d3 = pd.read_excel('3.xlsx')
# pd.concat([d1, d2, d3]).reset_index(drop=True).drop_duplicates().to_excel('utm_2023.xlsx', index=False)

## **LAPORAN - 2 : Analisa**

In [48]:
l2 = df[df.branch=='Digital Printing']

# analisa program 50K
l2['my'] = pd.to_datetime(l2.tanggal).dt.strftime('%m/%y')
l2['spk'] = l2.customer.apply(lambda x: str(x).split()[0])
l2 = l2[l2.spk!='SPK']

In [49]:
l2 = l2.groupby(['my','transaksi','ket','customer'])[['omset']].sum().reset_index()

def get_omset_promo(x):
    if x > 5000000:
        msg = 'Y'
    else:
        msg = 'N'
    return msg

l2['promotion_5m'] = l2.omset.apply(get_omset_promo)

In [50]:
eda1 = l2.groupby(['my','promotion_5m'])[['promotion_5m']].count().rename(columns={'promotion_5m':'count'}).reset_index()
_ = []
for i in eda1.my.unique():
    n = eda1[eda1.my==i]
    n['ratio'] = n['count'] / n['count'].sum()
    _.append(n)
eda1 = pd.concat(_)
eda1.to_excel('./Data/1. Analisa 19 Juni 2024 1.xlsx', index=False)

In [51]:
eda2 = l2[l2.my.isin(['05/24','06/24','01/24','04/24'])]
eda2 = eda2[eda2.promotion_5m == 'Y']

_ = []
for i in eda2.my.unique():
    n = eda2[eda2.my == i]
    n.sort_values('omset', ascending=False, inplace=True)
    _.append(n)
eda2 = pd.concat(_)
eda2['omset'] = eda2['omset'].apply(lambda x: rupiah(x).split(',')[0])
eda2.set_index(['my','transaksi','customer','ket'])[['omset']]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,omset
my,transaksi,customer,ket,Unnamed: 4_level_1
01/24,402518,UMUM,COPY A3 AS BUILT DRAWING AMBIL JAM 11 SIANG AN. RAHMA,Rp 26.616.000
01/24,402647,DIVISI_ONLINE_WA,KALENDER BIDAN S.E KAB.PASANGKAYU CEK DESAIN HARI JUMAT JAM 5 SORE 082313209222 IBU NITA,Rp 17.000.000
01/24,406025,UMUM,"BUKU A1, A3 JILID ALBUM, ALBUM PETA PENYUSUNAN REVISI RTRW AMBIL KAMIS 01.02.24 JAM 6 SORE 082395468782 AN. ANDI",Rp 12.640.000
01/24,404654,CASH/TUNAI,PEMERINTAH KABUPATEN MOROWALI UTARA 62 823-9546-8782 AN.ANDI,Rp 11.055.000
01/24,406500,CASH/TUNAI,PEMERINTAH KABUPATEN MOROWALI UTARA +62 823-9546-8782 AN.ANDI,Rp 10.605.000
01/24,402764,"ABDUL GAFAR, BPK","TAMBAHAN BUKU JILID ALBUM KENANGAN PMM, AN. PAK JAFAR",Rp 9.375.000
01/24,402712,UMUM,STIKER CALEG PAK AGUS | AN. JUNO 082268184391,Rp 8.442.000
01/24,404022,ONLINE_WA_PRIBADI,"KALENDER SDIT IZZUL ISLAM BAHAN 260GR SPIRAL GANTUNG , AN.annisa",Rp 7.575.000
01/24,402731,UMUM,KARTU SUARA RYAN RAMADHAN | AN. JUNO,Rp 7.089.000
01/24,405184,UMUM KREDIT,DICKY- PBSI - CENTRAL BADMINTON 2024 l KIRIM EDITAN TGL 23 SORE,Rp 7.076.000


In [52]:
check = df[df.transaksi.isin(eda2.transaksi.tolist())].drop_duplicates(subset='transaksi')[['tanggal','customer','transaksi','ket']]
check['my'] = pd.to_datetime(check.tanggal).dt.strftime('%m/%y')
check[['my','customer','transaksi','ket']].set_index(['my','customer','transaksi'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,ket
my,customer,transaksi,Unnamed: 3_level_1
06/24,ONLINE_WA_PRIBADI,425850,TUMBLER DAN NOTES MAYBAMK. DIAMBIL SELASA JAM ...
06/24,DIVISI_ONLINE_WA,425397,BUKU GUBERNUR PROFIL BARU 2024 DIAMBIL HARD CO...
06/24,CASH/TUNAI,424888,NEON BOX PRODIA AMBIL TGL 29 JUNI 2024 AN. KA ...
06/24,CASH/TUNAI,424889,"KOP SURAT,MAP, AMPLOP, STIKER OUTDOOR, STIKER ..."
06/24,DIVISI_ONLINE_WA,424401,BUKU SAWIT BPDPKS. DIAMBIL SABTU JAM 10 PAGI. ...
06/24,DIVISI_ONLINE_WA,424366,"TUMBLER PT NINDYA, DIAMBIL 25 SELASA SORE DAN ..."
06/24,JOYFUL KIDS,423154,BONEKA WISUDA & BUKU WISUDA
06/24,CASH/TUNAI,422738,BLOK NOTE BPKP (ISI 2 SISI 50 LBR HITAM PUTIH ...
05/24,KPU,420941,KPU BANGGAI | DIANTAR TGL 27
05/24,UMUM,421042,FLIPCARD A4 BKKBN AMBIL KAMIS 30 MEI +62 813-4...


In [53]:
l2 = df[df.branch=='Digital Printing']

# analisa program 50K
l2['my'] = pd.to_datetime(l2.tanggal).dt.strftime('%m/%y')
l2['spk'] = l2.customer.apply(lambda x: str(x).split()[0])
l2 = l2[l2.spk!='SPK']
l2 = l2[l2.customer.isin(['DIVISI_ONLINE_WA'])]

In [54]:
_ = []
for i in l2.cs.unique():
    n = l2[l2.cs==i]

    omset = n.omset.sum()
    day_meter = n.tanggal.max() - n.tanggal.min()
    
    days = int(str(day_meter).split()[0])
    hours = int(str(day_meter).split()[-1].split(':')[0])
    minute = int(str(day_meter).split()[-1].split(':')[1])

    hm = (days * 24 * 60) + (hours * 60) + (minute)
    hm = hm / 60
    
    cs = i

    _.append(
        pd.DataFrame({
            'cs':[cs],
            'hm':[hm],
            'omset':[omset]
        })
    )

eda1 = pd.concat(_)
eda1 = eda1.reset_index(drop=True)
eda1['omset/hm'] = eda1.omset / eda1.hm
eda1['omset/hm'].replace(np.inf, 0, inplace=True)
eda1.sort_values('omset/hm', ascending=False, inplace=True)

In [55]:
eda2 = l2[['cs','transaksi']].drop_duplicates().groupby(['cs'])[['transaksi']].count().reset_index().sort_values('transaksi', ascending=False)

In [56]:
eda1 = eda1.merge(eda2, on='cs', how='left')

In [57]:
eda1.to_excel('./Data/1. Analisa 19 Juni 2024 2.xlsx', index=False)

In [58]:
# l2 = df[df.branch=='Digital Printing']
l2 = df

# analisa program 50K
l2['my'] = pd.to_datetime(l2.tanggal).dt.strftime('%m/%y')
l2['year'] = pd.to_datetime(l2.tanggal).dt.year
l2['spk'] = l2.customer.apply(lambda x: str(x).split()[0])
# l2 = l2[l2.spk!='SPK']
# l2 = l2[l2.customer.isin(['DIVISI_ONLINE_WA'])]
eda1 = l2.groupby(['branch','divisi','cs','year','my','produk'])[['omset']].sum().reset_index()
eda1.to_excel('./Data/1. Analisa 19 Juni 2024 3.xlsx', index=False)

In [59]:
eda1 = eda1[eda1.my=='06/24']
_ = []
for i in eda1.cs.unique():
    n = eda1[eda1.cs==i]
    n = n.sort_values('omset', ascending=False)
    n = n.head(5)
    _.append(n)
eda1 = pd.concat(_)
eda1.to_excel('./Data/1. Analisa 19 Juni 2024 4.xlsx', index=False)

## **LAPORAN - 3 : Omset Crew Finishing**

In [60]:
# hapus baris yang kosong
pf = pf.dropna(axis=0)
pf = pf[pf.transaksi!='nan']

# copy data penjualan
pj = df.copy()
pj['transaksi'] = pj['transaksi'].apply(lambda x: str(x).split('/')[0].split('-')[0])
pj = pj[['divisi','transaksi','produk','omset']]
pj = pj[pj.divisi.isin(['ATK','Pinjaman Sementara','OE']) == False]
pj = pj.groupby(['divisi','transaksi'])[['omset']].sum().reset_index()

# merge data
pf = pf.merge(pj, on='transaksi', how='left')
pf['jml_crew_per_project'] = pf.crew_finishing.apply(lambda x: len(x.split(',')))
pf['omset'] = pf.omset / pf.jml_crew_per_project
pf['omset'].fillna(0, inplace=True)

# pisahkan data dari crew finishing yang tergabung
pf['crew_finishing'] = pf['crew_finishing'].apply(lambda x: str(x).split(', '))
pf = pf.explode('crew_finishing').reset_index(drop=True)
pf['tanggal'] = pd.to_datetime(pf.timestamps).dt.date
pf = pf.groupby(['tanggal','crew_finishing'])[['omset']].sum().reset_index()
pf.to_excel('./Data/4. dashboard crew finishing.xlsx', index=False)

## **LAPORAN - 4 : Check Piutang Customer All**

In [61]:
eda4 = pt.merge(df[['transaksi','cs','customer','ket']], how='left', on='transaksi')
eda4 = eda4.merge(pl1[['id','customer']], on='customer', how='left')
eda4['id'] = eda4['id'].apply(lambda x: str(x).split('.')[0][:2])
eda4.columns = [' '.join(i.split('_')).upper() for i in eda4.columns]
eda4 = eda4.drop_duplicates()

eda4 = eda4.groupby(['BRANCH','ID','CUSTOMER','CS','TANGGAL','TRANSAKSI','KET'])[['KREDIT']].sum().reset_index()

In [62]:
ab = eda4[eda4.ID == 'AB']
um = eda4[eda4.CUSTOMER.isin(['UMUM','UMUM KREDIT','CASH/TUNAI','DIVISI_ONLINE_WA','ONLINE_WA_PRIBADI'])]
check = pd.concat([ab, um])
check = check[check.BRANCH=='Digital Printing']
check = check.groupby(['BRANCH','CS','CUSTOMER','TANGGAL','TRANSAKSI','KET'])[['KREDIT']].sum().reset_index()
check = check[pd.to_datetime(check.TANGGAL).dt.year==2024]

In [63]:
check.to_excel('./Data/Daily Monitoring Piutang.xlsx', index=False)

In [64]:
eda4.to_excel('./Data/Rekap Piutang Semua Customer.xlsx')

## **LAPORAN - 5 : Check Piutang Customer All**