In [111]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import os
import requests
from datetime import datetime
import re
from collections import Counter
import math
from tqdm import tqdm

# setting pandas
pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)

# setting seaborn
sns.set_palette('Spectral')
sns.set_context('notebook', font_scale=1)
sns.set_style('whitegrid')
sns.set(rc={"axes.facecolor":"#FFF9ED","figure.facecolor":"#FFF9ED"})

warnings.filterwarnings('ignore')

# **Cleaning data**

In [112]:
path = 'data kotor pemakaian fuel'
files = os.listdir(path)
try:
    files.remove('.DS_Store')
except:
    None

In [113]:
sheets_name = [
    'REPORT unit OB',
    'REPORT unit QUARRY',
    'REPORT unit DEVELOP',
    'REPORT unit ORE GETTING',
    'REPORT unit DT HAUL'
]

In [114]:
import pandas as pd

# Daftar nama bulan dalam bahasa Indonesia
bulan = ["Januari", "Februari", "Maret", "April", "Mei", "Juni", "Juli", "Agustus", "September", "Oktober", "November", "Desember"]

# Nomor bulan
nomor_bulan = list(range(1, 13))

# Membuat DataFrame
df_bulan = pd.DataFrame({
    'Nomor Bulan': nomor_bulan,
    'Nama Bulan': bulan
})

df_bulan['Nama Bulan'] = df_bulan['Nama Bulan'].apply(lambda x: str(x).lower())

In [115]:
files

['06_cost production report juni SA - BDM 2023.xlsx',
 '05_cost production report mei SA - BDM 2023.xlsx',
 '02_cost production report februari SA - BDM 2023.xlsx',
 '03_cost production report maret SA - BDM 2023.xlsx',
 '03_cost production report maret SA - BDM 2024.xlsx',
 '04_cost production report april SA - BDM 2023.xlsx',
 '01_cost production report januari SA - BDM 2023.xlsx',
 '04_cost production report april SA - BDM 2024.xlsx',
 '01_cost production report januari SA - BDM 2024.xlsx',
 '07_cost production report juli SA - BDM 2023.xlsx']

In [116]:
__ = []
for i in files:
    i = i.split('~$')[-1]
    path_file = './' + path + '/' + i
    
    company = i.split()[4]
    if company == 'SA':
        company = 'CV. SENTOSA ABADI'
    elif company == 'APP':
        company = 'CV. Adil Prima Perkasa'

    month = df_bulan[df_bulan['Nama Bulan']==i.split()[3]]['Nomor Bulan'].unique().tolist()[0]
    month = str(month)
    year = i.split()[-1].split('.')[0]
    date = '1/' + month + '/' + year

    project = i.split()[-2]

    _ = []
    # baca data sesuai sheets masing-masing
    for index, sheet in enumerate(sheets_name):
            
        if index < 4:
            data = pd.read_excel(path_file, sheet_name=sheet).drop(columns=['Unnamed: 0','Unnamed: 1'])
                
            header1 = data[data.index==3]
            header2 = data[data.index==4]
            header3 = data[data.index==5]
            data = data[data.index>=6]
            
            header1.fillna('', inplace=True)
            header1 = header1.loc[3].tolist()
            header2.fillna('', inplace=True)
            header2 = header2.loc[4].tolist()
            header3.fillna('', inplace=True)
            header3 = header3.loc[5].tolist()
            columns = []
            
            for h1, h2, h3 in zip(header1, header2, header3):
                cols_name = h1 + '#' + h2 + '#' + h3
                columns.append(cols_name)
                
            data.columns = columns
            data = data.drop(columns='##')
            data = data.dropna(axis=0)
            data = data[['UNIT##','HOURS METER#HM#(hour)','FUEL CONSUMPTION#Fuel#(liter)']]
            data.columns = ['equipment','hm','konsumsi']
            data['retase'] = 0
            data['produksi'] = 0
            data['production'] = sheet
        else:
            data = pd.read_excel(path_file, sheet_name=sheet).drop(columns=['Unnamed: 0','Unnamed: 1'])
            header1 = data[data.index==3]
            header2 = data[data.index==4]
            header3 = data[data.index==5]
            data = data[data.index>=6]
            
            header1.fillna('', inplace=True)
            header1 = header1.loc[3].tolist()
            header2.fillna('', inplace=True)
            header2 = header2.loc[4].tolist()
            header3.fillna('', inplace=True)
            header3 = header3.loc[5].tolist()
            columns = []
            
            for h1, h2, h3 in zip(header1, header2, header3):
                cols_name = h1 + '#' + h2 + '#' + h3
                columns.append(cols_name)
                
            data.columns = columns
            data = data.drop(columns='##')

            try:
                data = data.drop(columns='WEEK##')
            except:
                data = data
                
            data = data.dropna(axis=0)
            data = data[['UNIT##','TONNAGE#Weight Bridge#Tonnage','##Retase','FUEL CONSUMPTION#Fuel#(liter)']]
            data.columns = ['equipment','produksi','retase','konsumsi']
            data['hm'] = 0
            data['production'] = sheet
        
            
        data['company'] = company
        data['date'] = date
        data['project'] = project
        _.append(data)

    
    data = pd.concat(_)[['date','company','project','production','equipment','hm','produksi','retase','konsumsi']]
    __.append(data)

data = pd.concat(__)

In [117]:
data[data.company=='CV. SENTOSA ABADI'].sort_values('date', ascending=True).date.unique().tolist()

['1/1/2023',
 '1/1/2024',
 '1/2/2023',
 '1/3/2023',
 '1/3/2024',
 '1/4/2023',
 '1/4/2024',
 '1/5/2023',
 '1/6/2023',
 '1/7/2023']

In [16]:
data['equipment'] = data['equipment'].apply(lambda x: ' '.join(str(x).upper().split()))
data['eq1'] = data.equipment.apply(lambda x: ' '.join(str(x).split()[:2]))
data['eq2'] = data.equipment.apply(lambda x: str(x).split()[-1])

def get_eq3(x):
    try:
        msg = x.split()[2]
    except:
        msg = x.split()[-1]
    return msg
data['eq3'] = data.equipment.apply(get_eq3)

def retext_hongyan(x):
    if x.split()[0] == 'HONGYANG':
        msg = 'HONGYAN 430'
    else:
        msg = x
    return msg

data['eq1'] = data['eq1'].apply(retext_hongyan)

In [17]:
def get_name(eq1, eq2, eq3):
    if eq1 == 'KOMATSU PC':
        msg = 'PC' + eq3 + '-' + eq2
        msg_ = 'KOMATSU PC' + eq3
        
    elif eq1 == 'KOMATSU HM':
        msg = 'HM' + eq3 + '-' + eq2
        msg_ = 'KOMATSU HM' + eq3
    
    elif eq1 == 'KOMATSU DOZER':
        msg = eq3[:3] + '-' + eq2
        msg_ = 'KOMATSU ' + eq3[:3]
        
    elif eq1 == 'SAKAI -':
        msg = ''.join(eq1.split()) + eq2
        msg_ = 'SAKAI SV'
    
    elif eq1 == 'KOBELCO SK':
        msg = 'SK' + eq3 + '-' + eq2
        msg_ = 'KOBELCO SK' + eq3
    
    elif eq1 == 'DT HINO' or eq1 == 'HINO 700':
        msg = 'DT-' + eq2
        if int(eq2) < 310:
            msg_ = 'HINO ' + 'ZS'
        else:
            msg_ = 'HINO ' + 'ZY'
        
    elif eq1 == 'KOMATSU SK':
        msg = 'SK' + eq3 + '-' + eq2
        msg_ = 'KOBELCO SK' + eq3
        
    elif eq1 == 'GRADER GD535':
        msg = 'GD-' + eq2
        msg_ = 'KOMATSU GD'
    
    elif eq1 == 'LOADER WA380' or eq1 == 'LOADER -':
        msg = 'LOADER-' + eq2
        msg_ = 'KOMATSU LOADER'

    elif eq1 == 'HONGYAN 430':
        msg = 'DT-' + eq2
        msg_ = 'HONGYAN KINKAN430'
        
    else:
        None

    msg = {'equipment_name':[msg_], 'name':[msg]}
    return msg

In [18]:
data['name'] = data.apply(lambda x: get_name(x.eq1, x.eq2, x.eq3)['name'][0], axis=1)
data['equipment_name'] = data.apply(lambda x: get_name(x.eq1, x.eq2, x.eq3)['equipment_name'][0], axis=1)
data = data[['date','company','project','production','equipment_name','name','hm','produksi','retase','konsumsi']]

In [19]:
def retext_production(x):
    if x == 'REPORT unit OB':
        msg = 'Stripping Overburden'
    elif x == 'REPORT unit QUARRY':
        msg = 'Quarry Mined'
    elif x == 'REPORT unit ORE GETTING':
        msg = 'Ore Getting'
    elif x == 'REPORT unit DT HAUL':
        msg = 'Hauling Ore'
    else:
        msg = 'Development'
    return msg

In [20]:
data['production'] = data.production.apply(retext_production)

In [21]:
data.to_excel('data pemakaian fuel.xlsx', index=False)

In [22]:
data[data.date=='1/1/2024'].company.unique()

array(['CV. Adil Prima Perkasa'], dtype=object)

In [24]:
data[data.company=='CV. SENTOSA ABADI'].sort_values('date', ascending=True).date.unique().tolist()

['1/4/2023', '1/5/2023', '1/6/2023', '1/7/2023']