In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import os
import requests
from datetime import datetime
import re
from collections import Counter
import math

# setting pandas
pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)

# setting seaborn
sns.set_palette('Spectral')
sns.set_context('notebook', font_scale=1)
sns.set_style('whitegrid')
sns.set(rc={"axes.facecolor":"#FFF9ED","figure.facecolor":"#FFF9ED"})

warnings.filterwarnings('ignore')

# **Define Function**

In [2]:
# create function to transform timedelat to minute
def timedelta_to_hour(timedelta_str):
    # Konversi timedelta string ke timedelta object
    timedelta_obj = pd.to_timedelta(timedelta_str)
    
    # Hitung total detik dalam timedelta
    total_seconds = timedelta_obj.total_seconds()
    
    # Konversi total detik ke jam
    total_hour = total_seconds / 3600
    
    return total_hour

In [3]:
# create transform curency
def rupiah(value):
    str_value = str(value)
    separate_decimal = str_value.split(".")
    after_decimal = separate_decimal[0]
    before_decimal = separate_decimal[1]

    reverse = after_decimal[::-1]
    temp_reverse_value = ""

    for index, val in enumerate(reverse):
        if (index + 1) % 3 == 0 and index + 1 != len(reverse):
            temp_reverse_value = temp_reverse_value + val + "."
        else:
            temp_reverse_value = temp_reverse_value + val

    temp_result = temp_reverse_value[::-1]

    return "Rp " + temp_result + "," + before_decimal

In [4]:
def similairty_sentence(text1, text2):
    # Menggunakan regex untuk memisahkan teks berdasarkan simbol dan spasi
    tokens1 = ''.join(re.split(r'\W+', text1.lower()))
    tokens2 = ''.join(re.split(r'\W+', text2.lower()))

    # Menghitung frekuensi kata dalam masing-masing teks
    counter1 = Counter(tokens1)
    counter2 = Counter(tokens2)

    # Menghitung dot product
    dot_product = sum(counter1[word] * counter2[word] for word in counter1 if word in counter2)

    # Menghitung magnitudo dari vektor tiap teks
    magnitude1 = math.sqrt(sum(counter1[word] ** 2 for word in counter1))
    magnitude2 = math.sqrt(sum(counter2[word] ** 2 for word in counter2))

    # Menghindari pembagian oleh nol
    if magnitude1 == 0 or magnitude2 == 0:
        return 0

    # Menghitung cosine similarity
    similarity = dot_product / (magnitude1 * magnitude2)
    
    return similarity

In [5]:
def read_file(path, sheet=''):
    list_file = os.listdir(path)
    try:
        list_file.remove('.DS_Store')
    except:
        None
    
    msg = []
    for i in list_file:
        if sheet == '':
            msg.append(pd.read_excel(path + '/' + i))
        else:
            msg.append(pd.read_excel(path + '/' + i, sheet_name=sheet))
    msg = pd.concat(msg)
    msg.reset_index(drop=True, inplace=True)
    return msg

# **ETL : Extracting, Transforming, Loading**

## Maintenance.Request

In [6]:
path_mr = '/Users/dickyaryanto/Documents/PT Sentosa Abadi Mining/Data/maintenance request'
df2 = read_file(path_mr)

df2['Broken Date'] = df2['Broken Date'].apply(lambda x: str(x))
df2['Request Date'] = df2['Request Date'].apply(lambda x: str(x))
df2['Scheduled Date'] = df2['Scheduled Date'].apply(lambda x: str(x))
df2['Completion Date'] = df2['Completion Date'].apply(lambda x: str(x))

def transformasi_nan_format(x):
    con = str(x)
    if con == 'NaT' or con == 'nan':
        msg = np.NaN
    else:
        msg = x
    return msg

df2['Broken Date'] = df2['Broken Date'].apply(transformasi_nan_format)
df2['Request Date'] = df2['Request Date'].apply(transformasi_nan_format)
df2['Scheduled Date'] = df2['Scheduled Date'].apply(transformasi_nan_format)
df2['Completion Date'] = df2['Completion Date'].apply(transformasi_nan_format)

df2 = df2.ffill()

path_p = '/Users/dickyaryanto/Documents/PT Sentosa Abadi Mining/Data/stock picking'
picking = read_file(path_p)

df2['Request Date'] = pd.to_datetime(df2['Request Date'])
df2 = df2[df2['Request Date'].dt.year>=2023]

# merge data df2 terkecuali berstatus Cancelles
def get_service(x):
    if x == '':
        msg = 'Service'
    else:
        msg = x
    return msg
    
df2['Picking Line/Status'] = df2['Picking Line/Status'].apply(get_service)

df2 = df2[df2['Picking Line/Status'].isin(['Service','Ready','Done'])]

df2 = df2.merge(picking[['Reference','Stock Moves/Product','Stock Moves/Quantity Done','Stock Moves/Unit of Measure']], left_on='Picking Line/Reference', right_on='Reference', how='left')

# refill data
df2_1 = df2[df2['Picking Line/Status']=='Service']
df2_1['Reference'] = '-'
df2_1['Stock Moves/Product'] = '-'
df2_1['Stock Moves/Quantity Done'] = 0
df2_1['Stock Moves/Unit of Measure'] = '-'

df2_2 = df2[df2['Picking Line/Status']!='Service']
df2 = pd.concat([df2_1, df2_2])

# extract maintenance.request
df = df2.copy()
df.columns = ['_'.join(i.split()).lower() for i in df.columns]
df.drop(columns='reference', inplace=True)

# transformasi tipe data
df['request_date'] = pd.to_datetime(df['request_date'])
df = df[df.request_date.dt.year >= 2023]

df.columns=['company','spk','stage','broken','request','schedule','done','oprator','category_equipment','equipment_name',
            'equipment_code','hm','km','requirement_type','category_maintenance','picking_reference','picking_status','note','description','qty','uom']
df['oprator_name'] = df.oprator.apply(lambda x: str(x).split('] ')[-1])
df = df[df.spk!='D85-06']
df = df[df.spk!='PC200-12']
df['spk'] = df.spk.apply(lambda x: int(str(x).split('/')[-1]))
df.sort_values('spk', ascending=False, inplace=True)
df['spk'] = df['spk'].astype(str)

def get_description(x):
    if x == False:
        msg = 'Service Only'
    else:
        msg = x
    return msg

df['description'] = df['description'].apply(get_description)
df['uom'] = df['uom'].apply(lambda x: str(x).upper())

df['broken'] = pd.to_datetime(df['broken'])
df['request'] = pd.to_datetime(df['request'])
df['schedule'] = pd.to_datetime(df['schedule'])
df['done'] = pd.to_datetime(df['done'])
df['hm'] = df['hm'].astype(float)
df['km'] = df['km'].astype(float)

_ = df[df.category_equipment==False]
_['category_equipment'] = 'Tidak Diketahui'
_['equipment_name'] = 'Tidak Diketahui'
_['equipment_code'] = 'Tidak Diketahui'
__ = df[df.category_equipment!=False]
df = pd.concat([_, __]).sort_values('spk', ascending=False)

def retext_equpment_name(x):
    if x in ['HINO ZY1EWPD-XS','HINO ZY1EWRD-XS','HINO ZY1EWRN-XS']:
        msg = 'HINO ZY'
    elif x in ['HINO ZS1EPPD-XS']:
        msg = 'HINO ZS'
    else:
        msg = x
    return msg

df['equipment_name'] = df['equipment_name'].apply(retext_equpment_name)
df['category_maintenance'].replace('TRYE', 'TYRE', inplace=True)
df['category_maintenance'].replace('TRYE (Archive)', 'TYRE', inplace=True)

# buat table category maintenance
tbl_cm = df[df.category_maintenance!=False][['category_maintenance','description']].drop_duplicates()

def get_correct_category_maintenance(x):
    n = tbl_cm[tbl_cm.description == x]
    if len(n) == 0:
        try:
            n = tbl_cm.copy()
            n['score'] = n.description.apply(lambda i: similairty_sentence(x, i))
            n = n.sort_values('scor', ascending=False)
            msg = n['category_maintenance'].uniqeu().tolist()[0]
        except:
            msg = x
    else:
        msg = n['category_maintenance'].unique().tolist()[0]
    return msg 

df_ = df[df.category_maintenance!=False]
df__ = df[df.category_maintenance==False]

df__['category_maintenance'] = df__.description.apply(get_correct_category_maintenance)
df = pd.concat([df_, df__])

## Product.Template

In [7]:
### extract product.template
path_pt = '/Users/dickyaryanto/Documents/PT Sentosa Abadi Mining/Data/product template'
product = read_file(path_pt)
product.columns = ['_'.join(i.lower().split()) for i in product.columns]
product['unit_of_measure'] = product['unit_of_measure'].apply(lambda x: str(x).upper())
product['name2'] = '#' + product['name'] + '#' + product['unit_of_measure']
product.drop_duplicates(subset='product', inplace=True)

# merge data product untuk dapatkan cost
df = pd.merge(df, product[['name', 'product','cost']], left_on='description', right_on='product', how='left').drop(columns='product')
df['cost'].fillna(0, inplace=True)
df['cost'] = df['cost'].astype(float)
df['qty'] = df['qty'].astype(float)

df['description'] = df['description'].apply(lambda x: str(x))
df['qty'] = df['qty'].apply(lambda x: str(x))
df['cost'] = df['cost'].apply(lambda x: str(x))

def get_decription(x):
    con = str(x)
    if con == 'nan':
        msg='Service'
    else:
        msg = x
    return msg

def get_qty(x):
    con = str(x)
    if con == 'nan':
        msg=0
    else:
        msg = x
    return msg

df['description'] = df.description.apply(get_description)
df['qty'] = df['qty'].apply(get_qty)
df['cost'] = df['cost'].apply(get_qty)
df = df.rename(columns={'name':'description_2'})
df['description_2'] = df['description_2'].apply(lambda x: str(x))
df['description_2'] = df.description_2.apply(get_description)
df['description_2'] = df['description_2'].replace('nan', 'Service')
df['description'] = df['description'].replace('nan', 'Service')
df['uom'] = df['uom'].replace('NAN', '-')
df['cost'] = df['cost'].astype(float)
df['qty'] = df['qty'].astype(float)

# conditional
a = df[df.equipment_name=='HONGYAN KINKAN430']
b = df[df.equipment_name!='HONGYAN KINKAN430']
a.requirement_type.replace('External', 'Internal', inplace=True)
df = pd.concat([a, b])

## Production.Timesheet

In [8]:
### extract production.timesheet
path_ts = '/Users/dickyaryanto/Documents/PT Sentosa Abadi Mining/Data/production timesheet'
df2 = read_file(path_ts)

df2.columns = ['_'.join(i.split()).lower() for i in df2.columns]
df2 = df2[df2.date.dt.year >= 2023]
# df2 = df2[df2['date']!=pd.to_datetime(str(pd.to_datetime(datetime.now())).split()[0])]
df2['hm_realtime_start'] = df2['hm_realtime_start'].apply(lambda x: str(x).split('.')[0])
df2['hm_realtime_end'] = df2['hm_realtime_end'].apply(lambda x: str(x).split('.')[0])

def get_true_time(date, time):
    time = str(time)
    if len(time) == 2:
        time = str(time) + ':00:00'
    else:
        time = '0' + str(time) + ':00:00'
        
    date = str(date).split()[0]
    msg = date + ' ' + time

    try:
        a = pd.to_datetime(msg)
    except:
        msg = 'error'
    
    return msg

df2['date_engine_start'] = df2.apply(lambda x: get_true_time(x['date'], x['hm_realtime_start']), axis=1)
df2['date_engine_end'] = df2.apply(lambda x: get_true_time(x['date'], x['hm_realtime_end']), axis=1)

# split data
df2_err = df2[(df2['date_engine_start']=='error') & (df2['date_engine_end']=='error')]
df2 = df2[(df2['date_engine_start']!='error') & (df2['date_engine_end']!='error')]

df2['date_engine_start'] = pd.to_datetime(df2['date_engine_start'])
df2['date_engine_end'] = pd.to_datetime(df2['date_engine_end'])

# cleaning condition memungkinkan tidak akan digunakan bila data bersih
df2_err['actual_engine'] = df2_err['hm_realtime_end'].astype(float) - df2_err['hm_realtime_start'].astype(float)
def retext_time(x):
    time = str(x)
    if len(time) == 2:
        time = str(time) + ':00:00'
    else:
        time = '0' + str(time) + ':00:00'
    return time

def get_time_from_err(x):
    start = 7
    end = start + x

    start_time = retext_time(str(start))
    end_time = retext_time(str(end).split('.')[0])

    msg = {'start':start_time, 'end':end_time}
    return msg

df2_err['hm_realtime_end'] = df2_err['actual_engine'].apply(lambda x: get_time_from_err(x)['end'])
df2_err['hm_realtime_start'] = df2_err['actual_engine'].apply(lambda x: get_time_from_err(x)['start'])

df2_err['date_engine_start'] = df2_err['date'].apply(lambda x: str(x).split()[0]) + ' ' + df2_err['hm_realtime_start']
df2_err['date_engine_end'] = df2_err['date'].apply(lambda x: str(x).split()[0]) + ' ' + df2_err['hm_realtime_end']

df2_err['date_engine_start'] = pd.to_datetime(df2_err['date_engine_start'])

_ = []
for i in df2_err.index:
    n = df2_err[df2_err.index==i]
    try:
        n['date_engine_end'] = pd.to_datetime(n['date_engine_end'])
    except:
        n['date_engine_end'] = 'error'
    _.append(n)

df2_err = pd.concat(_)
error_actual_date = df2_err[df2_err.date_engine_end=='error']
df2_err = df2_err[df2_err.date_engine_end!='error']

df2 = pd.concat([df2, df2_err])

df2['date_engine_end'] = pd.to_datetime(df2['date_engine_end'])
df2['date_engine_start'] = pd.to_datetime(df2['date_engine_start'])
df2['actual_engine'] = df2.date_engine_end - df2.date_engine_start
df2['actual_engine'] = df2['actual_engine'].apply(timedelta_to_hour)
df2 = df2[df2.state!='Draft']

df2['component_line/out_weight'].fillna(0, inplace=True)
df2['component_line/net_weight/voll'].fillna(0, inplace=True)

## Maintenance.Equipment

In [9]:
path_eq = '/Users/dickyaryanto/Documents/PT Sentosa Abadi Mining/Data/maintenance equipment'
tbl_equipment2 = read_file(path_eq)
tbl_equipment2['equipment_code'] = tbl_equipment2.Name.apply(lambda x: str(x) + '/') + tbl_equipment2['Serial Number'].apply(lambda x: str(x))
tbl_equipment2.columns = ['_'.join(i.lower().split()) for i in tbl_equipment2.columns.tolist()]

## Purchase.Order

### 1. PO Node3

In [10]:
path_po = '/Users/dickyaryanto/Documents/PT Sentosa Abadi Mining/Data/purchase order'
po = read_file(path_po)

po.columns = ['_'.join(i.lower().split()) for i in po.columns]
po['total'] = po['order_lines/quantity'] * po['order_lines/unit_price']
po = po.ffill()
po.drop(columns=['source_document','purchase_requests','order_lines/description'], inplace=True)

### 2. PO Selain Dari Node3

In [11]:
### extract data PO diluar Node3 ambil dari Google Drive 
url = "https://docs.google.com/spreadsheets/d/1Ay0TNz8yPhdNzr4KhGmNy1QENGJS6B-Z/export?format.xlsx"
try:
    os.remove('po_sam.xlsx')
except:
    None
    
output_filename = "po_sam.xlsx"

# get the data from spreadsheet
response = requests.get(url)
if response.status_code == 200:
    with open(output_filename, "wb") as f:
        f.write(response.content)

# transformasi nama columns
po_sam_rekap = pd.read_excel('po_sam.xlsx', sheet_name='rekap')
po_sam_rekap.columns = ['_'.join(i.lower().split()) for i in po_sam_rekap.columns]

po_sam_detail = pd.read_excel('po_sam.xlsx', sheet_name='detail')
po_sam_detail.columns = ['_'.join(i.lower().split()) for i in po_sam_detail.columns]

# merge
po_sam = po_sam_rekap.merge(po_sam_detail[['nopo','nodt','tglterima']], on='nopo')
po_sam = po_sam[['nopo','nodt','tglterima','description','qty','ppn']]
po_sam.columns = ['no_po','name','broken','description','qty','cost']

def get_same_text(text1):
    n = tbl_equipment2.copy()
    n['similairity'] = n.name.apply(lambda x: similairty_sentence(text1, x))
    n.sort_values('similairity', ascending=False, inplace=True)
    n = n.head(1)

    score = n['similairity'].unique().tolist()[0]
    if score >= 0.9:
        try:
            cat_eq = n['equipment_category'].unique().tolist()[0]
            eq_model = n['equipment_model'].unique().tolist()[0]
            name = n['name'].unique().tolist()[0]
            id = n['serial_number'].unique().tolist()[0]
            msg = {'cat_eq':[cat_eq],'eq_model':[eq_model],'name':[name],'id':[id]}
        except:
            msg = {'cat_eq':[np.NaN],'eq_model':[np.NaN],'name':[text1],'id':[np.NaN]}
    else:
        msg = {'cat_eq':[np.NaN],'eq_model':[np.NaN],'name':[text1],'id':[np.NaN]}
        
    return msg

def get_same_text_category_maintenance(text1):
    n = df.copy()
    n = n[['category_maintenance','description']]
    n.drop_duplicates(inplace=True)
    n['similairity'] = n.description.apply(lambda x: similairty_sentence(text1, x))
    n.sort_values('similairity', ascending=False, inplace=True)
    n = n.head(1)

    score = n['similairity'].unique().tolist()[0]
    if score >= 0.8:
        try:
            msg = n['category_maintenance'].unique().tolist()[0]
        except:
            msg = np.NaN
    else:
        msg = np.NaN
        
    return msg

po_sam['category_equipment'] = po_sam.name.apply(lambda x: get_same_text(x)['cat_eq'][0])
po_sam['equipment_name'] = po_sam.name.apply(lambda x: get_same_text(x)['eq_model'][0])
po_sam['id'] = po_sam.name.apply(lambda x: get_same_text(x)['id'][0])
po_sam['name'] = po_sam.name.apply(lambda x: get_same_text(x)['name'][0])
po_sam.fillna('', inplace=True)
po_sam['equipment_code'] = po_sam.name + '/' + po_sam['id']
po_sam['category_maintenance'] = po_sam.description.apply(lambda x: get_same_text_category_maintenance(x))
po_sam.fillna('HMSI', inplace=True)

po_sam['company_code'] = po_sam.no_po.apply(lambda x: str(x).split('/')[1])

po_sam['company'] = po_sam['company_code'].apply(lambda x: 'PT. SENTOSA ABADI MINING' if x == 'SAM' else 'Unknown')

po_sam = po_sam[['company','broken','category_equipment','equipment_name','equipment_code','name','category_maintenance','description','qty','cost']]

## Standar Pengukuran

In [12]:
path_standar_ukur = '/Users/dickyaryanto/Documents/PT Sentosa Abadi Mining/Data/standar ukur'

tbl_jarak = read_file(path_standar_ukur, 'tbl jarak move type')
tbl_kapasitas = read_file(path_standar_ukur, 'tbl kapasitas dt')
tbl_waktu = read_file(path_standar_ukur, 'tbl waktu move type')
tbl_periodical = read_file(path_standar_ukur, 'tbl periodical unit')

## Produksi Actual Site

### 1. Absen Driver

In [13]:
# url = "https://docs.google.com/spreadsheets/d/1CDBo-9hz0-Vc7e-hgjG1qdIZ07-PEdLlMatzssJhTJU/export?format.xlsx"
# try:
#     os.remove('absen_driver.xlsx')
# except:
#     None
    
# output_filename = 'absen_driver.xlsx'

# # get the data from spreadsheet
# response = requests.get(url)
# if response.status_code == 200:
#     with open(output_filename, "wb") as f:
#         f.write(response.content)

# # read data absen driver dari tbl produksi actual site
# absen_driver = pd.read_excel('absen_driver.xlsx', sheet_name='Absen Driver')
# absen_driver = absen_driver[1:]
# absen_driver.columns = ['_'.join(i.lower().split()) for i in absen_driver.columns]

### 2. Grade

In [14]:
# url = "https://docs.google.com/spreadsheets/d/12_BTs8ftDwcp0DCva7wTZzLuzaE9nkKrgJ_onoqMbDM/export?format.xlsx"
# try:
#     os.remove('grade_report.xlsx')
# except:
#     None
    
# output_filename = 'grade_report.xlsx'

# # get the data from spreadsheet
# response = requests.get(url)
# if response.status_code == 200:
#     with open(output_filename, "wb") as f:
#         f.write(response.content)

# # read data grade
# df_grade = pd.read_excel('grade_report.xlsx', sheet_name='Laporan Grade', names=['timestamps','date','dome id',
#                                                                                          'tonase','ni','fe','sio2/mgo','nan','kontrak','standar_grade'])
# # read grade
# grade = df_grade[1:]
# grade = grade[['timestamps','date','dome id','tonase','ni','fe','sio2/mgo']]

# # read standar grade
# tbl_grade = df_grade[1:]
# tbl_grade = tbl_grade[['kontrak','standar_grade']]

### 3. Accdient Rate

In [15]:
# url = "https://docs.google.com/spreadsheets/d/19y9xZ3L3AMYGDZOVG_6Q5hNX7HFQSW__tf6YXX_Ojgk/export?format.xlsx"
# try:
#     os.remove('accident_rate.xlsx')
# except:
#     None
    
# output_filename = 'accident_rate.xlsx'

# # get the data from spreadsheet
# response = requests.get(url)
# if response.status_code == 200:
#     with open(output_filename, "wb") as f:
#         f.write(response.content)

# # read data absen driver dari tbl produksi actual site
# accident = pd.read_excel('accident_rate.xlsx')
# accident = accident[1:]
# accident.columns = ['timestamps','id','dol','ba','kat_kecelakaan','tempat',
#                     'name','departemen','emp','umur','estimate','keterangan']

# **EDA : Exploratory Data Analyst**

## Task 1 : DASHBOARD

Berapa waktu rata-rata yang dibutuhkan untuk team Workshop memperbaiki setiap request dari setiap crew produksi/transportasi?

In [16]:
t1 = df.copy()
t1['t_broken_done'] = t1.done - t1.broken
t1['t_schedule_done'] = t1.done - t1.schedule
t1['requirement_type'].replace('', 'Internal', inplace=True)

In [17]:
t1_in = t1[t1.requirement_type=='Internal']
t1_ex = t1[t1.requirement_type=='External']
t1_ex['spk'] = t1_ex['spk'].apply(lambda x: str(x))
t1_ex.drop(columns=['description','qty','uom','description_2','cost'], inplace=True)

In [18]:
po_spk = po[po.no_spk!=False]
po_spk['no_spk'] = po_spk['no_spk'].apply(lambda x: str(str(x).split('/')[-1]))

In [19]:
t1_ex = t1_ex.merge(po_spk[['no_spk', 'order_lines/product', 'order_lines/quantity', 'order_lines/unit_of_measure','total']], how='left', left_on='spk', right_on='no_spk')

In [20]:
t1_ex = t1_ex.rename(columns={
    'order_lines/product':'description',
    'order_lines/quantity':'qty',
    'order_lines/unit_of_measure':'uom',
    'total':'cost'
})

t1_ex['uom'] = t1_ex['uom'].apply(lambda x: str(x).upper())

In [21]:
t1_ex['description_2'] = t1_ex.description.apply(get_description)
t1_ex['description_2'].fillna('Service', inplace=True)
t1_ex = t1_ex[t1_in.columns]
t1_ex['cost'].fillna(0, inplace=True)
t1_ex['cost'] = t1_ex.cost / t1_ex.qty

In [22]:
t1 = pd.concat([t1_in, t1_ex])
t1 = t1.sort_values('spk', ascending=False).reset_index(drop=True)

In [23]:
# create function to transform timedelat to minute
def timedelta_to_minute(timedelta_str):
    # Konversi timedelta string ke timedelta object
    timedelta_obj = pd.to_timedelta(timedelta_str)
    
    # Hitung total detik dalam timedelta
    total_seconds = timedelta_obj.total_seconds()
    
    # Konversi total detik ke jam
    total_minute = total_seconds / 60
    
    return total_minute

t1['t_broken_done'] = t1['t_broken_done'].apply(timedelta_to_minute)
t1['t_schedule_done'] = t1['t_schedule_done'].apply(timedelta_to_minute)
t1['t_broken_done'].fillna(0, inplace=True)
t1['t_schedule_done'].fillna(0, inplace=True)
t1 = t1[t1.stage != 'Cancel']
t1['cost'] = t1.cost * t1.qty

def get_time(x):
    hours = int(x // 60)
    remaining_minutes = int(x % 60)
    formated_time = f"{hours} Hour {remaining_minutes} Minute"
    formated_time = pd.to_timedelta(formatted_time)
    return formated_time

t1['number'] = 1
t1['category_maintenance'].replace('TRYE', 'TYRE', inplace=True)
t1['name'] = t1.equipment_code.apply(lambda x: str(x).split('/')[0])

In [24]:
t1['equipment_name'] = t1['equipment_name'].apply(lambda x: ' '.join(str(x).split()))

def retext_equipment_name_t1(x):
    if x in ['SAKAI SAKAI SV 525 D']:
        msg = 'SAKAI SV'
    else:
        x = ' '.join(x.split()[:2])
        if x in ['KOMATSU PC200-8M1','KOMATSU PC200-10/S21','KOMATSU PC200-8M0','KOMATSU PC200-8']:
            msg = 'KOMATSU PC200'
        elif x in ['HINO WU352R-HKMRJD8B','HINO WU352R-HKMRJD3L']:
            msg = 'HINO WU352R'
        elif x in ['KOBELCO SK200-10','KOBELCO SK330-14','KOBELCO SK']:
            msg = 'KOBELCO SK'
        elif x in ['KOMATSU PC300SE-8','KOMATSU PC300SE-8MO','KOMATSU PC300-8M0','KOMATSU PC300SE-8M0']:
            msg = 'KOMATSU PC300'
        elif x in ['KOMATSU PC400LCSE-8']:
            msg = 'KOMATSU PC400'
        elif x in ['KOMATSU D85ESS-2','KOMATSU D85E-SS-2']:
            msg = 'KOMATSU D85'
        elif x in ['KOMATSU D65P-12']:
            msg = 'KOMATSU D65'
        elif x in ['KOMATSU GD511A-1','KOMATSU GD535-5']:
            msg = 'KOMATSU GD'
        elif x in ['KOMATSU HM400-3R','KOMATSU HM400']:
            msg = 'KOMATSU HM400'
        elif x in ['KOMATSU PC500LC-10R']:
            msg = 'KOMATSU PC500'
        elif x in ['PAJERO SPORT','PAJERO']:
            msg = 'PAJERO'
        else:
            msg = x.split('-')[0]
    return msg

t1['equipment_name'] = t1['equipment_name'].apply(lambda x: retext_equipment_name_t1(x))
t1 = t1[t1.equipment_name!='NISSAN CWA']

In [25]:
t1.to_excel('./Report/task1.xlsx', index=False)

In [26]:
t1_1 = t1.copy()
t1_1['date_broken'] = pd.to_datetime(t1_1.broken).dt.date

broken = t1_1[['broken','category_equipment','equipment_name','spk']]
broken['date'] = broken.broken.dt.date
broken = broken.drop_duplicates(subset='spk')
broken = broken.groupby(['date','category_equipment','equipment_name'])[['spk']].count().reset_index().rename(columns={'spk':'values'})
broken['status'] = 'Broken'

done = t1_1[['done','category_equipment','equipment_name','spk']]
done['date'] = done.done.dt.date
done = done.drop_duplicates(subset='spk')
done = done.groupby(['date','category_equipment','equipment_name'])[['spk']].count().reset_index().rename(columns={'spk':'values'})
done['status'] = 'Done'

t1_1 = pd.concat([broken, done])
t1_1 = t1_1[pd.to_datetime(t1_1.date).dt.year >= 2023]
t1_1 = t1_1.rename(columns={'date':'request'})
t1_1.to_excel('./Report/task1_1.xlsx', index=False)

## Task 2 : Dashboard

KPI Detail

In [27]:
# ukur PA masing-masing alat
jam_operasional = df2[['company','project/name','production','move_type','name','date','date_engine_start','date_engine_end','equipment','component_line/net_weight/voll']]
jam_operasional = jam_operasional.rename(columns={'actual_engine':'hm_engine_actual'})
jam_operasional = jam_operasional.rename(columns={'component_line/net_weight/voll':'produksi'})
jam_operasional['produksi'] = jam_operasional['produksi'] / 1000

In [28]:
def get_work_date(x):
    x = str(x).split()[0]
    
    date_start = str(x).split()[0] + ' 07:00:00'
    date_end = str(x).split()[0] + ' 17:00:00'
    msg = {'start':date_start, 'end':date_end}
    return msg

In [29]:
jam_operasional['actual_start_date'] = jam_operasional.date.apply(lambda x: get_work_date(x)['start'])
jam_operasional['actual_end_date'] = jam_operasional.date.apply(lambda x: get_work_date(x)['end'])

jam_operasional['actual_start_date'] = pd.to_datetime(jam_operasional['actual_start_date'])
jam_operasional['actual_end_date'] = pd.to_datetime(jam_operasional['actual_end_date'])

In [30]:
def get_real_actual_work(actual_start, actual_end, engine_start, engine_end):
    start = pd.DataFrame({'start_date':[actual_start, engine_start]})
    end = pd.DataFrame({'end_date':[actual_end, engine_end]})

    start = start[start.start_date==start.start_date.max()]
    start = start.start_date.tolist()[0]

    end = end[end.end_date==end.end_date.min()]
    end = end.end_date.tolist()[0]

    msg = end - start
    msg = timedelta_to_hour(msg)
    return msg 

In [31]:
jam_operasional['hm_engine_in_work_productivity'] = jam_operasional.apply(lambda x: get_real_actual_work(x.actual_start_date, x.actual_end_date, x.date_engine_start, x.date_engine_end), axis=1)

In [32]:
jam_operasional = jam_operasional.groupby(['company','project/name','production','move_type','name','date','equipment'])[['hm_engine_in_work_productivity','produksi']].sum().reset_index()
jam_operasional = jam_operasional.reset_index().sort_values('index', ascending=False).drop(columns='index').reset_index(drop=True)
jam_operasional['date'] = jam_operasional['date'].dt.date
jam_operasional['date'] = jam_operasional['date'].apply(lambda x: str(x))
jam_operasional = jam_operasional.rename(columns={'name':'spk_project'})

In [33]:
def get_jam_stanby_true(x):
    if x < 0:
        msg = 0
    else:
        msg = x
    return msg

In [34]:
jam_operasional['hm_engine_in_work_productivity'] = jam_operasional['hm_engine_in_work_productivity'].apply(get_jam_stanby_true)

In [35]:
from tqdm import tqdm

_ = []
for i in tqdm(jam_operasional.equipment.unique().tolist()):
    n = jam_operasional[jam_operasional.equipment==i]
    n['date'] = pd.to_datetime(n['date']).dt.date
    for y in n['date'].unique().tolist():
        m = n[n['date']==y]

        company = m['company'].unique().tolist()[0]
        project = m['project/name'].unique().tolist()[0]
        production = ';'.join(m['production'].tolist())
        move_type = m['move_type'].unique().tolist()[0]
        spk = ';'.join(m['spk_project'].tolist())
        hm_engine = m['hm_engine_in_work_productivity'].sum()
        produksi = m['produksi'].sum()
        
        try:
            msg = pd.DataFrame({
                'company':[company],
                'project/name':[project],
                'production':production,
                'spk_project':spk,
                'move_type':move_type,
                'date':[y],
                'equipment':[i],
                'hm_engine_in_work_productivity':[hm_engine],
                'produksi':[produksi]
            })
        except:
            msg = pd.DataFrame({
                'company':[company],
                'project/name':[project],
                'production':production,
                'spk_project':spk,
                'move_type':[move_type],
                'date':[y],
                'equipment':[i],
                'hm_engine_in_work_productivity':[hm_engine],
                'produksi':[produksi]
            })

        _.append(msg)
            

jam_operasional = pd.concat(_).reset_index(drop=True)

100%|████████████████████████████████████████████████████████████████████████████████| 339/339 [00:14<00:00, 22.84it/s]


In [36]:
jam_breakdown = df[['stage','spk','category_equipment','equipment_name','equipment_code','broken','done']]
jam_breakdown['name'] = jam_breakdown.equipment_code.apply(lambda x: str(x).split('/')[0])
jam_breakdown = jam_breakdown[['stage', 'spk', 'category_equipment', 'equipment_name','equipment_code', 'name', 'broken', 'done']]

In [37]:
jam_breakdown = jam_breakdown.rename(columns={'spk':'spk_workshop'})
jam_breakdown = jam_breakdown[jam_breakdown.stage!='Cancel']
jam_breakdown = jam_breakdown[jam_breakdown.equipment_code != False]

# fill jam done dengan waktu update apabila belum selesai diperbaiki
from datetime import datetime
def get_done_time(x, breakdown):
    con = str(x)
    if con == None or con == np.NaN or con == 'NaT':
        msg = get_work_date(breakdown)['end']
    else:
        msg = x
    return msg

In [38]:
jam_breakdown['done'] = jam_breakdown.apply(lambda x: get_done_time(x.done, x.broken), axis=1)
jam_breakdown.dropna(axis=0, inplace=True)

In [39]:
jam_breakdown['actual_start_date'] = jam_breakdown['broken'].apply(lambda x: get_work_date(x)['start'])
jam_breakdown['actual_end_date'] = jam_breakdown['broken'].apply(lambda x: get_work_date(x)['end'])
jam_breakdown['actual_start_date'] = pd.to_datetime(jam_breakdown['actual_start_date'])
jam_breakdown['actual_end_date'] = pd.to_datetime(jam_breakdown['actual_end_date'])

In [40]:
def get_real_actual_work_breakdown(actual_start, actual_end, breakdown, done, stage):
    start = pd.DataFrame({'start_date':[actual_start, breakdown]})
    end = pd.DataFrame({'end_date':[actual_end, done]})

    start = start[start.start_date==start.start_date.max()]
    start = start.start_date.tolist()[0]

    if stage != 'DONE':
        end = actual_end
    else:
        end = end[end.end_date==end.end_date.min()]
        end = end.end_date.tolist()[0]

    msg = end - start
    msg = timedelta_to_hour(msg)
    if msg < 0:
        msg = 0
    else:
        None
    return msg 

In [41]:
jam_breakdown['hm_maintenance'] = jam_breakdown.apply(lambda x: get_real_actual_work_breakdown(x.actual_start_date, x.actual_end_date, x.broken, x.done, x.stage), axis=1)
jam_breakdown = jam_breakdown[jam_breakdown.broken.dt.year >= 2023]

In [42]:
from tqdm import tqdm

_ = []
for i in tqdm(jam_breakdown.equipment_code.unique().tolist()):
    n = jam_breakdown[jam_breakdown.equipment_code==i]
    n['date'] = n.broken.dt.date
    for y in n.date.unique().tolist():
        m = n[n['date']==y]

        stage = ';'.join(m['stage'].tolist())
        spk = ';'.join(m.spk_workshop.tolist())
        cat_eq = m['category_equipment'].unique().tolist()[0]
        eq_name = m['equipment_name'].unique().tolist()[0]
        eq_code = m['equipment_code'].unique().tolist()[0]
        name = m['name'].unique().tolist()[0]
        broken = ';'.join(m['broken'].apply(lambda x: str(x)).tolist())
        done = ';'.join(m['done'].apply(lambda x: str(x)).tolist())
        hm = m.hm_maintenance.sum()
        
        msg = pd.DataFrame({
            'stage':[stage],
            'spk_workshop':[spk],
            'category_equipment':[cat_eq],
            'equipment_name':[eq_name],
            'name':[name],
            'equipment_code':[eq_code],
            'date':[y],
            'broken':[broken],
            'done':[done],
            'hm_maintenance':[hm]
        })
        _.append(msg)

jam_breakdown = pd.concat(_).reset_index(drop=True)

100%|████████████████████████████████████████████████████████████████████████████████| 422/422 [00:18<00:00, 23.39it/s]


In [43]:
def in_scale(x):
    if x > 10:
        msg = 10
    else:
        msg = x
    return msg

jam_breakdown['hm_maintenance'] = jam_breakdown['hm_maintenance'].apply(in_scale)

In [44]:
def get_id(jam, equipment):
    jam = str(jam)
    eq = str(equipment)
    msg = '#' + jam + '#' + eq
    return msg

jam_operasional['id'] = jam_operasional.apply(lambda x: get_id(x['date'], x['equipment']), axis=1)
jam_breakdown['id'] = jam_breakdown.apply(lambda x: get_id(x['date'], x['equipment_code']), axis=1)

In [45]:
jam_operasional = jam_operasional.rename(columns={'hm_engine_in_work_productivity':'hm_engine_actual'})

In [46]:
t2 = pd.merge(jam_operasional[['id','company','project/name','production','move_type','spk_project','hm_engine_actual','produksi']], jam_breakdown[['id','stage','spk_workshop','hm_maintenance']], how='outer', on='id')
t2['date'] = t2['id'].apply(lambda x: str(x).split('#')[1])
t2['equipment_code'] = t2['id'].apply(lambda x: str(x).split('#')[-1])
t2 = t2.merge(jam_breakdown[['category_equipment','equipment_name','name','equipment_code']], on='equipment_code', how='left')
t2['day'] = pd.to_datetime(t2.date).dt.day
t2['month'] = pd.to_datetime(t2.date).dt.month
t2['year'] = pd.to_datetime(t2.date).dt.year
t2 = t2.sort_values(['year','month','day'], ascending=False)
t2.reset_index(drop=True, inplace=True)
t2.drop(columns=['day','month','year'], inplace=True)
t2['hm_engine_actual'].fillna(0, inplace=True)
t2 = t2.rename(columns={'hm_engine_actual':'jam_produktivitas','hm_maintenance':'jam_breakdown'})
t2['jam_breakdown'].fillna(0, inplace=True)

In [47]:
def get_jam_stanby_true(x):
    if x < 0:
        msg = 0
    else:
        msg = x
    return msg

t2['jam_standby'] = t2.apply(lambda x: get_jam_stanby_true(10 - (x.jam_produktivitas + x.jam_breakdown)), axis=1)
t2['spk_project'].fillna('-', inplace=True)
t2['move_type'].fillna('-', inplace=True)
t2['spk_workshop'].fillna('-', inplace=True)
t2['project/name'].fillna('-', inplace=True)
t2['company'].fillna('-', inplace=True)
t2['project/name'] = t2['project/name'].apply(lambda x: str(x))
t2['produksi'].fillna(0, inplace=True)
t2['production'].fillna('-', inplace=True)
t2['stage'].fillna('-', inplace=True)
t2['category_equipment'].fillna('Tidak Diketahui', inplace=True)
t2['equipment_name'].fillna('Tidak Diketahui', inplace=True)
t2['name'].fillna('Tidak Diketahui', inplace=True)
t2.drop(columns='id', inplace=True)

In [48]:
from datetime import datetime, timedelta

# Tentukan tanggal awal (1 Januari 2023)
start_date = datetime(2023, 1, 1)

# Tentukan tanggal hari ini
today = datetime.today()

# Buat list untuk menyimpan rentang tanggal
date_range = []

# Hitung jumlah hari dari tanggal awal hingga hari ini
delta = today - start_date

# Tambahkan setiap tanggal dalam rentang tersebut ke dalam list
for i in range(delta.days + 1):
    date = start_date + timedelta(days=i)
    date_range.append(date)

date = pd.DataFrame({'date':date_range})
date['date'] = date['date'].apply(lambda x: str(x).split()[0])

In [49]:
t2 = t2.drop_duplicates()

In [50]:
kpi = []
for i in tqdm(t2.equipment_code.unique().tolist()):
    n = t2[t2.equipment_code==i]

    cat_eq = n.category_equipment.unique().tolist()[0]
    eq_code = n.equipment_code.unique().tolist()[0]
    eq_name = n.equipment_name.unique().tolist()[0]
    name = n.name.unique().tolist()[0]
    
    n = date.merge(n, on='date', how='left')
    n['company'].fillna('-', inplace=True)
    n['project/name'].fillna('-', inplace=True)
    n['spk_project'].fillna('-', inplace=True)
    n['spk_workshop'].fillna('-', inplace=True)
    n['move_type'].fillna('-', inplace=True)
    n['stage'].fillna('-', inplace=True)
    n['production'].fillna('-', inplace=True)
    n['category_equipment'].fillna(cat_eq, inplace=True)
    n['equipment_name'].fillna(eq_name, inplace=True)
    n['name'].fillna(name, inplace=True)
    n['equipment_code'].fillna(eq_code, inplace=True)
    n['jam_produktivitas'].fillna(0, inplace=True)
    n['jam_breakdown'].fillna(0, inplace=True)
    n['produksi'].fillna(0, inplace=True)
    n['jam_standby'] = n.apply(lambda x: get_jam_stanby_true(10 - (x.jam_produktivitas + x.jam_breakdown)), axis=1)
    kpi.append(n)

# gabungkan data
kpi = pd.concat(kpi)
kpi['category_equipment'].replace('TidakDiketahui', 'Tidak Diketahui', inplace=True)
kpi.drop_duplicates(inplace=True)

# merge dengan data standar jarak tempuh move type
kpi = kpi.merge(tbl_jarak, on='move_type', how='left').rename(columns={'jarak':'standar_jarak'})
kpi['standar_jarak'].fillna(0, inplace=True)

# merge dengna data standar maximal muatan dt
kpi = kpi.merge(tbl_kapasitas[['equipment_name','kapasitas_maximal']], on='equipment_name', how='left')
kpi['kapasitas_maximal'].fillna(0, inplace=True)

kpi = kpi[['date','company','project/name','production','move_type','spk_project','spk_workshop','stage','category_equipment','equipment_name','name','equipment_code','standar_jarak','jam_produktivitas','jam_breakdown','jam_standby','kapasitas_maximal','produksi']]

kpi['retase'] = kpi.produksi / kpi.kapasitas_maximal
kpi['retase'].replace(np.inf, 0, inplace=True)
kpi['total_jarak_tempuh'] = kpi.retase * kpi.standar_jarak
kpi.fillna(0, inplace=True)
kpi = kpi[kpi.equipment_name!='NISSAN CWA 260 X']

kpi_ = kpi[kpi.category_equipment!='HEAVY EQUIPMENT']
kpi__ = kpi[kpi.category_equipment=='HEAVY EQUIPMENT']

def rename_heavy_equipment(x):
    if x == 'SAKAI SAKAI SV 525 D':
        msg = 'SAKAI'
    elif x == 'KOMATSU LOADER WA 380-3':
        msg = 'KOMATSU LOADER'
    else:
        fn = x.split()[0]
        bn = ' '.join(x.split()[1:])[:2]
        msg = fn + ' ' + bn
    return msg 

kpi__['equipment_name'] = kpi__['equipment_name'].apply(rename_heavy_equipment)
kpi = pd.concat([kpi_, kpi__])

kpi = kpi.merge(tbl_waktu, on='move_type', how='left')
kpi['waktu'].fillna(0, inplace=True)

100%|███████████████████████████████████████████████████████████████████████████████| 432/432 [00:02<00:00, 166.07it/s]


In [51]:
def retext_jam_produktivitas(work, produksi, retase, waktu):
    if work == 0 and produksi != 0:
        msg = retase * waktu
    else:
        msg = work
    return msg

list_index = kpi[(kpi.jam_produktivitas == 0) & (kpi.produksi!=0)].index.tolist()

kpi['jam_produktivitas'] = kpi.apply(lambda x: retext_jam_produktivitas(x.jam_produktivitas, x.produksi, x.retase, x.waktu), axis=1)

def repair_jam(i, work, bd, s):
    if i in list_index:
        if work > 10:
            work = 10 - bd
            if work < 0:
                work = 0
        else:
            work = work
            if work < 0:
                work = 0
        
        s = 10 - (work + bd)
        msg = [work, bd, s]
    else:
        msg = [work, bd, s]
    return msg

kpi['retase'] = np.round(kpi['retase'])
kpi['jam_produktivitas'] = kpi.reset_index().apply(lambda x: repair_jam(x['index'], x.jam_produktivitas, x.jam_breakdown, x.jam_standby)[0], axis=1)
kpi['jam_breakdown'] = kpi.reset_index().apply(lambda x: repair_jam(x['index'], x.jam_produktivitas, x.jam_breakdown, x.jam_standby)[1], axis=1)
kpi['jam_standby'] = kpi.reset_index().apply(lambda x: repair_jam(x['index'], x.jam_produktivitas, x.jam_breakdown, x.jam_standby)[2], axis=1)

In [52]:
kpi.to_excel('./Report/task2.xlsx', index=False)

In [53]:
# ukur PA masing-masing alat
jam_operasional = df2[['project/name','production','move_type','name','date','date_engine_start','date_engine_end','equipment','component_line/in_weight','component_line/out_weight','component_line/net_weight/voll']]
jam_operasional = jam_operasional.rename(columns={'actual_engine':'hm_engine_actual'})
jam_operasional = jam_operasional.rename(columns={'component_line/net_weight/voll':'produksi'})
jam_operasional['produksi'] = jam_operasional['produksi'] / 1000

jam_operasional = jam_operasional.rename(columns={
    'component_line/in_weight':'in_weight',
    'component_line/out_weight':'out_weight'
})

jam_operasional['in_weight'] = jam_operasional['in_weight'] / 1000
jam_operasional['out_weight'] = jam_operasional['out_weight'] / 1000

def get_work_date(x):
    x = str(x).split()[0]
    
    date_start = str(x).split()[0] + ' 07:00:00'
    date_end = str(x).split()[0] + ' 17:00:00'
    msg = {'start':date_start, 'end':date_end}
    return msg

jam_operasional['actual_start_date'] = jam_operasional.date.apply(lambda x: get_work_date(x)['start'])
jam_operasional['actual_end_date'] = jam_operasional.date.apply(lambda x: get_work_date(x)['end'])

jam_operasional['actual_start_date'] = pd.to_datetime(jam_operasional['actual_start_date'])
jam_operasional['actual_end_date'] = pd.to_datetime(jam_operasional['actual_end_date'])

def get_real_actual_work(actual_start, actual_end, engine_start, engine_end):
    start = pd.DataFrame({'start_date':[actual_start, engine_start]})
    end = pd.DataFrame({'end_date':[actual_end, engine_end]})

    start = start[start.start_date==start.start_date.max()]
    start = start.start_date.tolist()[0]

    end = end[end.end_date==end.end_date.min()]
    end = end.end_date.tolist()[0]

    msg = end - start
    msg = timedelta_to_hour(msg)
    return msg 

jam_operasional['hm_engine_in_work_productivity'] = jam_operasional.apply(lambda x: get_real_actual_work(x.actual_start_date, x.actual_end_date, x.date_engine_start, x.date_engine_end), axis=1)

# jam_operasional = df2.groupby(['project/name','name','date','equipment'])[['hm_engine_actual']].sum().reset_index()
jam_operasional = jam_operasional.groupby(['project/name','production','move_type','name','date','equipment'])[['hm_engine_in_work_productivity','in_weight','out_weight','produksi']].sum().reset_index()
jam_operasional = jam_operasional.reset_index().sort_values('index', ascending=False).drop(columns='index').reset_index(drop=True)
jam_operasional['date'] = jam_operasional['date'].dt.date
jam_operasional['date'] = jam_operasional['date'].apply(lambda x: str(x))
jam_operasional = jam_operasional.rename(columns={'name':'spk_project'})

def get_jam_stanby_true(x):
    if x < 0:
        msg = 0
    else:
        msg = x
    return msg

jam_operasional['hm_engine_in_work_productivity'] = jam_operasional['hm_engine_in_work_productivity'].apply(get_jam_stanby_true)

In [54]:
t2_1 = jam_operasional.copy()
t2_1 = t2_1.merge(df[['category_equipment','equipment_name','equipment_code']].drop_duplicates(), left_on='equipment', right_on='equipment_code', how='left').drop(columns='equipment')
t2_1['name'] = t2_1.equipment_code.apply(lambda x: str(x).split('/')[0])

In [55]:
t2_1 = t2_1.groupby(['date','spk_project','project/name','production','move_type','category_equipment','equipment_name','name'])[['hm_engine_in_work_productivity','in_weight','out_weight','produksi']].sum().reset_index()

In [56]:
t2_1.to_excel('./Report/task2_1.xlsx', index=False)

## Task 3 : DASHBOARD

Buat perbandingan antara Biaya terhadap hasil produksi

In [57]:
t3 = df2.copy()
t3 = t3[t3.production=='Hauling Ore']
target = ['HINO ZS','HINO ZY','HONGYAN KINKAN430']
equipment = df[df.equipment_name.isin(target)==True][['category_equipment','equipment_name','equipment_code']].drop_duplicates()
equipment['equipment_detail'] = equipment.equipment_code.apply(lambda x: str(x).split('/')[0])
t3 = t3.merge(equipment, left_on='equipment', right_on='equipment_code', how='left')
t3 = t3[t3.equipment_name.isin(target)]
t3['produksi'] = t3['component_line/in_weight'] - t3['component_line/out_weight']

t3 = t3[['project/name','project','name','date','move_type','hm_engine_actual','production','employee','category_equipment','equipment_name','equipment_code','equipment_detail','component_line/in_weight','component_line/out_weight','produksi','state']]
t3.columns = [i.split('/')[-1] for i in t3.columns]

# group data produksi harian 
t3 = t3.groupby(['date','project','move_type','equipment_name','equipment_detail'])[['in_weight','out_weight','produksi']].sum().reset_index()
t3['date'] = t3['date'].dt.date
t3['date'] = t3['date'].astype(str)

# klasifikasi status dari tujuan produksi berdasarkan moving type
def get_produksi_status(x):
    x = str(x)
    con = x.split('- ')[-1]
    if con == 'KM 7':
        msg = 'Deposit'
    else:
        msg = 'Sell'
    return msg

t3['produksi_status'] = t3['move_type'].apply(lambda x: get_produksi_status(x))

# group data biaya harian
cost_t1 = t1.copy()
cost_t1 = cost_t1[['broken','equipment_name','name','cost']]
cost_t1['broken'] = cost_t1['broken'].dt.date
cost_t1 = cost_t1.groupby(['broken','equipment_name','name'])[['cost']].sum().reset_index()
cost_t1.columns = ['date','equipment_name','equipment_detail','cost']
cost_t1['date'] = cost_t1['date'].astype(str)
cost_t1 = cost_t1[cost_t1.equipment_name.isin(t3.equipment_name.unique().tolist())]

# merge kedua data
t3['id'] = t3.date +'#'+ t3.equipment_name +'#'+ t3.equipment_detail
cost_t1['id'] = cost_t1.date +'#'+ cost_t1.equipment_name +'#'+ cost_t1.equipment_detail
t3 = t3.merge(cost_t1[['id','cost']], on='id', how='outer')
t3 = t3[['id','project','move_type','produksi_status','in_weight','out_weight','produksi','cost']]
t3['date'] = t3['id'].apply(lambda x: str(x).split('#')[0])
t3['equipment_name'] = t3['id'].apply(lambda x: str(x).split('#')[1])
t3['equipment_detail'] = t3['id'].apply(lambda x: str(x).split('#')[-1])
t3.drop(columns='id', inplace=True)

t3 = t3[['date','project','move_type','produksi_status','equipment_name','equipment_detail','in_weight','out_weight','produksi','cost']]

# cleaning data
t3['cost'].fillna(0, inplace=True)
t3['in_weight'].fillna(0, inplace=True)
t3['out_weight'].fillna(0, inplace=True)
t3['produksi'].fillna(0, inplace=True)
t3['project'].fillna('Breakdown', inplace=True)
t3['move_type'].fillna('Breakdown', inplace=True)
t3['produksi_status'].fillna('Breakdown', inplace=True)

t3_bd = t3[t3.project=='Breakdown']
t3_bd = t3_bd[t3_bd.cost!=0]
t3_ac = t3[t3.project!='Breakdown']
t3 = pd.concat([t3_bd, t3_ac])
t3 = t3.sort_values('date', ascending=False).reset_index(drop=True)
t3['produksi'] = t3['produksi']/1000
t3.to_excel('./Report/task3.xlsx', index=False)

## Task 4 : DASHBOARD
Kumpulkan trend data produksi dan bandingkan dengan maximal muatan nya 

In [58]:
c2 = kpi.copy()
c2 = c2[c2['project/name']!='-']
c2 = c2[c2.retase>0]
c2['count_sheet'] = c2['production'].apply(lambda x: len(str(x).split(';')))
c2['target_produksi'] = c2.count_sheet * c2.kapasitas_maximal
c2 = c2[c2.target_produksi!=0]
c2 = c2[['date','project/name','production','move_type','spk_project','category_equipment','equipment_name','name','standar_jarak','jam_produktivitas','jam_breakdown','jam_standby','kapasitas_maximal','produksi','target_produksi','retase','total_jarak_tempuh','count_sheet']]

def get_different(retase, c_spk_kerja, produksi, target_produksi):
    retase_round = np.round(retase)
    
    con = retase_round == c_spk_kerja
    diff = c_spk_kerja - retase
    p_diff = diff / c_spk_kerja

    selisih_target = target_produksi - produksi
    p_selisih_target = selisih_target/target_produksi

    msg = {
        'condition':[con],
        'diff':[diff],
        'p_diff':[p_diff],
        'selisih_target_produksi':[selisih_target],
        'p_selisih_target_produksi':[p_selisih_target]
    }

    return msg

ret = c2.iloc[0]['retase']
count_sheet = c2.iloc[0]['count_sheet']
produksi = c2.iloc[0]['produksi']
target_produksi = c2.iloc[0]['target_produksi']

c2['retase_berbeda'] = c2.apply(lambda x: get_different(x.retase, x.count_sheet, x.produksi, x.target_produksi)['condition'][0], axis=1)
c2['selisih_retase_berbeda'] = c2.apply(lambda x: get_different(x.retase, x.count_sheet, x.produksi, x.target_produksi)['diff'][0], axis=1)
c2['p_selisih_retase_berbeda'] = c2.apply(lambda x: get_different(x.retase, x.count_sheet, x.produksi, x.target_produksi)['p_diff'][0], axis=1)
c2['selisih_target_produksi'] = c2.apply(lambda x: get_different(x.retase, x.count_sheet, x.produksi, x.target_produksi)['selisih_target_produksi'][0], axis=1)
c2['p_selisih_target_produksi'] = c2.apply(lambda x: get_different(x.retase, x.count_sheet, x.produksi, x.target_produksi)['p_selisih_target_produksi'][0], axis=1)

# eda1 akan menunjukan grafik perbedaan muatan yang diproduksi dengan yang sebenarnya dapat tercapai.
eda1 = c2[['date','spk_project','move_type','equipment_name','name','kapasitas_maximal','produksi','retase','count_sheet']].rename(columns={
    'kapasitas_maximal':'kapasitas'
})
eda1['retase'] = eda1['retase'].apply(lambda x: int(np.round(x)))

# retase didapatkan dengan cara membagi hasil produksi dengan kapasitas DT
# apabila kapasitas DT di maksimalkan maka semestinya akan diperoleh hasil produksi yang maksimal kita nama produksi maksimal
eda1['produksi_max_retase'] = eda1.retase * eda1.kapasitas

# count_sheet merupakan jumlah sheet yang dibuat oleh operator. Umum nya jumlah sheet ini mewakili banyak retase yang dilakukan. Selanjutnya kita akan hitung berapa hasil maksimal dari banyak sheet apabila menggunakan maksimal muatas yang kita namai sebagai produksi max count sheet
eda1['produksi_max_count_sheet'] = eda1.count_sheet * eda1.kapasitas

eda1 = eda1.groupby(['date','spk_project','move_type','equipment_name','name'])[['kapasitas','produksi','retase','count_sheet','produksi_max_retase','produksi_max_count_sheet']].sum().reset_index()

cols = ['produksi','retase','count_sheet','produksi_max_retase','produksi_max_count_sheet']

eda2_ = []
for i in tqdm(cols):
    data = eda1.copy()
    values = data[i].tolist()
    data['status'] = i
    data = data[['date','spk_project','move_type','equipment_name','name','kapasitas','status']]
    data['value'] = values
    eda2_.append(data)

eda2 = pd.concat(eda2_)

eda2['produksi_status'] = eda2.move_type.apply(get_produksi_status)
eda2.to_excel('./Report/task5_1.xlsx', index=False)
eda1.to_excel('./Report/task5_2.xlsx', index=False)

100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 287.75it/s]


## Task 5 : DASHBOARD
- Dashboard Monitoring Prodical Maintenance Equipment
- Rerata Equipment harus masuk dalam periodical Maintenance

In [59]:
# show table periodical
tbl_periodical.head()

Unnamed: 0,equipment_name,description,oil_consumption,standar_hm
0,KOMATSU HM400-3R,oli engine,64.0,350
1,KOMATSU HM400-3R,oli transmisi,156.0,1000
2,KOMATSU HM400-3R,final driver,32.0,2000
3,KOMATSU HM400-3R,diffrential,106.0,2000
4,KOMATSU HM400-3R,hyraulick,167.0,3000


In [60]:
# kumpul data historical dari setiap alat
bd7 = df[['spk','broken','category_equipment','equipment_name','equipment_code','hm']]
ts7 = df2[['name','date','equipment','hm_engine_start','hm_engine_end','actual_engine']]

# transform data
bd7['name'] = bd7['equipment_code'].apply(lambda x: str(x).split('/')[0])
bd7 = bd7.rename(columns={'spk':'spk','broken':'date','hm':'hm_engine_end'})
bd7 = bd7.drop(columns=['equipment_code'])
bd7['status'] = 'b/d'
bd7 = bd7[['status','spk','date','category_equipment','equipment_name','name','hm_engine_end']]

ts7 = ts7.rename(columns={'name':'spk','equipment':'name'})
ts7['name'] = ts7['name'].apply(lambda x: str(x).split('/')[0])

# merge data
ts7 = ts7.merge(bd7[['category_equipment','equipment_name','name']].drop_duplicates(), on='name', how='left')
ts7['status'] = 'timesheet'
ts7 = ts7[['status','spk','date','category_equipment','equipment_name','name','hm_engine_end']]

# concat all data time sheet and data breadown
t7 = pd.concat([ts7,bd7])
t7['date'] = pd.to_datetime(t7.date).dt.date
# t7 = t7[pd.to_datetime(t7.date).dt.year>2023]

# kita asumsikan data tidak terdapat human error
t7 = t7[t7.hm_engine_end!=0]

t7 = t7.sort_values(['name','date','hm_engine_end'], ascending=True)
t7 = t7.drop_duplicates(subset=['date','category_equipment','equipment_name','name','hm_engine_end'])

In [61]:
# sample data untuk uji program

list_unit_hino_zy = t7[t7.equipment_name=='HINO ZY'].name.unique().tolist()
sample = t7[t7.name==list_unit_hino_zy[20]]

# cleaning data
sample['pct_change'] = sample.hm_engine_end.pct_change()
sample = sample[sample['pct_change']>0]
sample = sample[sample['pct_change']<sample['pct_change'].median()]
sample = sample[pd.to_datetime(sample.date).dt.year>=2024]
sample.drop(columns='pct_change', inplace=True)

def get_increase_hm(list_hm):
    msg = []
    for i in range(len(list_hm)):
        try:
            value = list_hm[i + 1] - list_hm[i]
            msg.append(value)
        except:
            msg.append(None)
    msg = [0] + msg[:-1]   
    return msg

# apply function
sample['increase_hm'] = get_increase_hm(sample.hm_engine_end.tolist())

In [62]:
def cek_periode_ganti(hm_engine_end, eq_name):
    base = tbl_periodical[tbl_periodical.equipment_name==eq_name].reset_index(drop=True)
    base['params'] = [0 for i in range(len(base))]
    
    increase_hm = get_increase_hm(hm_engine_end)
    msg = []
    for i in increase_hm:
        # buat perulangan untuk menambahkan params
        for k in range(len(base)):
            base['params'][k] = base['params'][k] + i
            
        base['con'] = base.params >= base.standar_hm
        if True in base['con'].tolist():
            n = base[base.con == True]
            index_list = n.index.tolist()
            for j in index_list:
                base['params'][j] = 0
            msg.append(','.join(n.description.tolist()))
        else:
            msg.append(np.NaN)

    return msg

In [63]:
t7_ = []
for i in tqdm(t7.name.unique().tolist()):
    n = t7[t7.name==i]
    # n = n[pd.to_datetime(n.date).dt.year>=2024]
    
    hm_list = n['hm_engine_end'].tolist()
    eq_name = n['equipment_name'].unique().tolist()[0]
    
    n['periodical_maintenance'] = cek_periode_ganti(hm_list, eq_name)
    t7_.append(n)

100%|████████████████████████████████████████████████████████████████████████████████| 355/355 [00:22<00:00, 16.02it/s]


In [64]:
t7 = pd.concat(t7_)

In [65]:
t7.groupby(['name','periodical_maintenance'])[['periodical_maintenance']].count()

Unnamed: 0_level_0,Unnamed: 1_level_0,periodical_maintenance
name,periodical_maintenance,Unnamed: 2_level_1
D65-02,engine,2
D65-07,engine,1
D65-10,"damper,final driver,transmission",1
D65-10,engine,3
D65-10,"engine,damper,final driver,transmission",1
...,...,...
SK330-11,engine,2
SK330-11,"engine,hyraulick,swing machinery,final driver",2
SK330-11,"engine,swing machinery,final driver",2
SK330-12,"engine,hyraulick,swing machinery,final driver",1


In [66]:
check = t7[t7.name=='D65-10'].periodical_maintenance.value_counts().reset_index().to_dict()

In [67]:
# Ubah kolom 'index' menjadi unik
unique_index = []
for index_list in check['index'].values():
    unique_index.extend(index_list.split(','))

unique_index = sorted(set(unique_index))

# Buat DataFrame baru dengan indeks unik
new_data = {
    'index': [', '.join(unique_index)] * len(check['index']),
    'periodical_maintenance': check['periodical_maintenance'].values()
}

In [68]:
pd.DataFrame(new_data)

Unnamed: 0,index,periodical_maintenance
0,"damper, engine, final driver, hyraulick, trans...",3
1,"damper, engine, final driver, hyraulick, trans...",1
2,"damper, engine, final driver, hyraulick, trans...",1
3,"damper, engine, final driver, hyraulick, trans...",1


In [69]:
check = t1[pd.to_datetime(t1.done).dt.strftime('%m/%y').isin(['02/23'])]

In [70]:
rupiah(check[check.requirement_type=='External'].cost.sum())

'Rp 4.475.284.124,0'

In [71]:
check[check.requirement_type=='External'].to_excel('check_external.xlsx', index=False)

## Task 6 : DASHBOARD

In [72]:
t10 = t3.copy()

# KONTRAK KBM : MINING KBM
# KONTRAK BLOK 8 : MINIM BDM
# RENTAL BLOK 8 : [RENTAL BDM] [BLOK 5 - PABRIK]
# RENTAL FEEDING KM 7 : [RENTAL BDM] [KM7 - PABRIK]

def get_kontrak(project, move):
    if project == 'Mining KBM':
        msg = 'Kontrak KBM'
        
    elif project == 'Mining BDM':
        msg = 'Kontrak Blok 8'
        
    elif project == 'Rental BDM':
        if move == 'BLOK 5 - PABRIK':
            msg = 'Rental Blok 8'
            
        elif move == 'KM7 - PABRIK':
            msg = 'Feeding KM7'
            
        else:
            msg = 'Tidak Diketahui'

    else:
        msg = 'Tidak Diketahui'
    return msg

# APPLY FUNCTION
t10['kontrak'] = t10.apply(lambda x: get_kontrak(x.project, x.move_type), axis=1)
t10 = t10[t10.kontrak!='Tidak Diketahui']
t10 = t10[['date','kontrak','move_type','equipment_name','equipment_detail','in_weight','out_weight','produksi','cost']]

# mengisi tanggal kosong pada masing-masing kontrak
_ = []
for i in tqdm(t10.kontrak.unique().tolist()):
    n = t10[t10.kontrak == i]

    move = n.move_type.unique().tolist()[0]

    n = date.merge(n, on='date', how='left')
    n['kontrak'].fillna(i, inplace=True)
    n['move_type'].fillna(move, inplace=True)
    n['equipment_name'].fillna('Tidak Diketahui', inplace=True)
    n['equipment_detail'].fillna('Tidak Diketahui', inplace=True)
    n['in_weight'].fillna(0, inplace=True)
    n['out_weight'].fillna(0, inplace=True)
    n['produksi'].fillna(0, inplace=True)
    n['cost'].fillna(0, inplace=True)
    _.append(n)

t10 = pd.concat(_)
t10 = t10.merge(tbl_kapasitas[['equipment_name','kapasitas_maximal']], on='equipment_name', how='left')
t10['retase'] = t10.produksi / t10.kapasitas_maximal
t10['kapasitas_maximal'].fillna(0, inplace=True)
t10['retase'].fillna(0, inplace=True)
t10.to_excel('./Report/dashboard_request_pak_jon_1.xlsx', index=False)

100%|███████████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 289.87it/s]


# **CC : Cost Control**

## CC1 : Summary of Working Hours

In [176]:
cc1 = df2.groupby(['date','name','company','project','production','move_type','equipment'])[['hm_engine_start','hm_engine_end']].sum().reset_index()
cc1['equipment'] = cc1.equipment.apply(lambda x: str(x).split('/')[0])
cc1 = cc1.rename(columns={'name':'ts','equipment':'name'})
cc1 = cc1.merge(kpi[['category_equipment','equipment_name','name']].drop_duplicates(), on='name')
cc1 = cc1[['date','ts','company','project','production','move_type','category_equipment','equipment_name','name','hm_engine_start','hm_engine_end']]
cc1['date'] = pd.to_datetime(cc1.date).dt.date

# manipulate data agar sesuai kebutuhan
_ = []
for i in tqdm(cc1.date.unique().tolist()):
    n = cc1[cc1.date == i]
    for j in n.name.unique().tolist():
        m = n[n.name==j]
        count_ret = len(m)
        hm = m.head(1).hm_engine_end - m.head(1).hm_engine_start
        msg = m.head(1)
        msg['hm'] = hm
        msg['retase'] = count_ret
        _.append(msg)
cc1 = pd.concat(_)

# isi hari dimana alat tidak dipekerjakan
_ = []
for i in tqdm(cc1.name.unique().tolist()):
    n = cc1[cc1.name==i]
    cat_eq = n.category_equipment.unique().tolist()[0]
    eq_name = n.equipment_name.unique().tolist()[0]
    name = i
    n = date.merge(n, on='date', how='outer')
    n['ts'].fillna('-', inplace=True)
    n['company'].fillna('-', inplace=True)
    n['project'].fillna('-', inplace=True)
    n['production'].fillna('-', inplace=True)
    n['move_type'].fillna('-', inplace=True)
    n['category_equipment'].fillna(cat_eq, inplace=True)
    n['equipment_name'].fillna(eq_name, inplace=True)
    n['name'].fillna(i, inplace=True)
    n['hm_engine_start'].fillna(0, inplace=True)
    n['hm_engine_end'].fillna(0, inplace=True)
    n['hm'].fillna(0, inplace=True)
    n['retase'].fillna(0, inplace=True)
    _.append(n)
    
cc1 = pd.concat(_)
cc1['plan_hm'] = 10
cc1 = cc1.groupby(['date','company','project','production','move_type','category_equipment','equipment_name','name'])[['hm','plan_hm']].sum().reset_index()

# pisahkan actual
cc1_actual = cc1.drop(columns='plan_hm')
cc1_actual['status'] = 'Actual'
cc1_actual = cc1_actual.rename(columns={'hm':'values'})

# pisahkan plan
cc1_plan = cc1.drop(columns='hm')
cc1_plan['status'] = 'Plan'
cc1_plan = cc1_plan.rename(columns={'plan_hm':'values'})

# gabung dengan menambahkkan kolom status
cc1 = pd.concat([cc1_actual, cc1_plan])

# save data
cc1.to_excel('./Report/cc dashboard 1.xlsx', index=False)

100%|████████████████████████████████████████████████████████████████████████████████| 372/372 [00:16<00:00, 22.29it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 329/329 [00:00<00:00, 360.34it/s]


## CC2 : Owning & Operation Cost

In [209]:
# dapatkan nilai HM total per unit 
eq_hm = kpi.copy()
eq_hm = eq_hm.groupby(['date','category_equipment','equipment_name','name'])[['jam_produktivitas']].sum().reset_index()

In [223]:
# dapatkan nilai rm dengan memisahkan semua biaya ganti ban
rm = t1.copy()
rm = rm[['company','done','stage','category_equipment','equipment_name','name','requirement_type','category_maintenance','description_2','cost']]
rm['date'] = pd.to_datetime(rm['done']).dt.date
rm['description2'] = rm['description_2'].apply(lambda x: str(x).split('] ')[-1])
rm['description1'] = rm.description2.apply(lambda x: ' '.join(str(x).split()[:2]))
rm.drop(columns=['description_2','done'], inplace=True)
rm = rm[rm.stage=='DONE']
rm = rm[rm.description1!='BAN LUAR']
rm = rm[['date','company','stage','category_equipment','equipment_name','name','requirement_type','category_maintenance','description2','cost']]
rm = rm.groupby(['date','company','category_equipment','equipment_name','name'])[['cost']].sum().reset_index()

In [234]:
# dapatkan nilai fuel
url_konsumsi = "https://docs.google.com/spreadsheets/d/1Q9nZYOoRZZL_PB6wUPDyBtzuWJv2qZXpbXYlt7wrGho/export?format.xlsx"

try:
    os.remove('fuel_comsumption.xlsx')
except:
    None
    
output_filename = 'fuel_comsumption.xlsx'

# get the data from spreadsheet
response = requests.get(url_konsumsi)
if response.status_code == 200:
    with open(output_filename, "wb") as f:
        f.write(response.content)

# read data
fuel = pd.read_excel(output_filename)
fuel = fuel[fuel.index>0].reset_index(drop=True).drop(columns='CODE USER')
fuel.columns = ['_'.join(i.lower().split()) for i in fuel.columns]
fuel.rename(columns={'id_unit':'name'})

Unnamed: 0,timestamps,id,date,project,production,name,konsumsi,user
0,2024-05-22 13:21:25.920,0.0,2024-05-16 00:00:00,Rental KBM,Quarry Mined,DT-22,15,DICKY 1


In [235]:
fuel

Unnamed: 0,timestamps,id,date,project,production,id_unit,konsumsi,user
0,2024-05-22 13:21:25.920,0.0,2024-05-16 00:00:00,Rental KBM,Quarry Mined,DT-22,15,DICKY 1
