In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import os
import requests
from datetime import datetime
import re
from collections import Counter
import math
from tqdm import tqdm

# setting pandas
pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)

# setting seaborn
sns.set_palette('Spectral')
sns.set_context('notebook', font_scale=1)
sns.set_style('whitegrid')
sns.set(rc={"axes.facecolor":"#FFF9ED","figure.facecolor":"#FFF9ED"})

warnings.filterwarnings('ignore')

# Data Anna 

In [2]:
path = 'data kak anna'

# tarik list data
try:
    files = os.listdir(path)
    files.remove('.DS_Store')
except:
    None

_ = []
for i in files:
    names = path + '/' + i
    data = pd.read_excel(names, sheet_name='Database')
    _.append(data)

df = pd.concat(_)
df.columns = ['_'.join(i.lower().split()) for i in df.columns]

In [3]:
def get_id_truck(x):
    try:
        msg = str(x).split()[0]
    except:
        msg = x
    return msg

df['truck_id2'] = df.truck_id.apply(get_id_truck)

In [4]:
# kode G untuk Mining KBM
# kode C untuk Rental BDM / Mining BDM

def get_kontrak(x):
    if x == 'G':
        msg = 'Mining KBM'
    elif x == 'C':
        msg = 'Mining BDM'
    else:
        msg = x
    return msg

df['project'] = df.truck_id2.apply(get_kontrak)

In [5]:
def get_move_type(x):
    if x == 'KBMP':
        msg = 'KBM - PABRIK'
    elif x == 'KM7P':
        msg = 'KM 7 - PABRIK'
    elif x == 'BLOK8P' or x == 'BLOK8 - PABRIK':
        msg = 'BLOK 8 - PABRIK'
    elif x == 'BLOK8KM7':
        msg = 'BLOK 8 - KM 7'
    else:
        msg = 'Unknown'
    return msg

df['move_type'] = df.movetype.apply(get_move_type)

In [6]:
df = df[['date_in','lokasi','production','project','move_type','truck_id','fuel']]

In [7]:
df.columns = ['date','lokasi','lokasi1','production','project','move_type','name','fuel']

df['fuel'].fillna(0, inplace=True)
df['name'].fillna('Unknown', inplace=True)
df['lokasi'].fillna('Unknown', inplace=True)
df['lokasi'] = df['lokasi'].apply(lambda x: str(x).upper())

df['production'].fillna('Unknown', inplace=True)
df['date'].fillna('Unknown', inplace=True)

df.drop(columns='lokasi1', inplace=True)
df.reset_index(drop=True, inplace=True)

In [8]:
def retext_lokasi(x):
    if x in ['KBM','KBM - PABRIK HONGYAN','KBM - PABRIK H700']:
        mt = 'KBM - PABRIK'
        project = 'Mining KBM'
    elif x in ['KBM KM7-PABRIK HINO 700+']:
        mt = 'KM 7 - PABRIK'
        project = 'Mining KBM'
    elif x in ['KBM - KM7 HINO 700+','KBM - KM7 HONGYAN','KBM KM7-PABRIK HINO 700']:
        mt = 'KBM - KM 7'
        project = 'Mining KBM'
    elif x in ['BLOK 8 - KM7 H700','BLOK 8 - KM7','BLOK 8 - KM7 HONGYAN']:
        mt = 'BLOK 8 - KM 7'
        project = 'Mining BDM'
    elif x in ['BLOK 8 KONTRAK','BLOK 8 - PABRIK']:
        mt = 'BLOK 8 - PABRIK'
        project = 'Mining BDM'
    elif x in ['BLOK 5 - PABRIK','BLOK 5 - PABRIK H700+']:
        mt = 'BLOK 5 - PABRIK'
        project = 'Mining BDM'
    elif x in ['BLOK5 - SOCKPILEKM7 700','BLOK5 - SOCKPILEKM7 700+']:
        mt = 'BLOK 5 - KM 7'
        project = 'Mining BDM'
    else:
        mt = 'Unknown'
        project = 'Unknown'

    msg = {'mt':mt, 'project':project}
    return msg

df['project2'] = df['lokasi'].apply(lambda x: retext_lokasi(x)['project'])
df['lokasi'] = df['lokasi'].apply(lambda x: retext_lokasi(x)['mt'])

In [9]:
def retext_name(x):
    try:
        msg = 'DT-' + x.split()[-1]
    except:
        msg = 'DT-Unknown'
    return msg

df['name'] = df.name.apply(retext_name)
df = df[df.name!='DT-Unknown']

In [10]:
def retext_date(x):
    x = str(x).split()[0]
    years = x.split('-')[0]
    month = x.split('-')[1]
    day = x.split('-')[-1]

    date = day + '/' + month + '/' + years
    return date
    
# df['date'] = df['date'].apply(retext_date)

In [11]:
df = df[['date','project','project2','production','move_type','lokasi','name','fuel']]

In [12]:
df['production'] = 'Hauling Ore'

In [13]:
df = df[df.fuel!=0]

In [14]:
def retext_fuel(x):
    try:
        msg = float(x)
    except:
        msg = 0
    return msg

In [15]:
df['fuel'] = df['fuel'].apply(retext_fuel)
df = df[df.fuel!=0]

In [16]:
df = df[['date','project','production','lokasi','name','fuel']].rename(columns={'lokasi':'move_type'})

In [17]:
df['date'] = pd.to_datetime(df.date).dt.date
df['date'] = df['date'].apply(lambda x: str(x))

In [18]:
path_ts = '/Users/dickyaryanto/Documents/PT Sentosa Abadi Mining/Data/production timesheet/'
ts_files = os.listdir(path_ts)
try:
    ts_files.remove('.DS_Store')
except:
    None

_ = []
for i in ts_files:
    data = pd.read_excel(path_ts + '/' + i)
    _.append(data)
ts = pd.concat(_)
ts = ts[['Date','Project/Name','Move Type','Production','Equipment','HM Engine Start']]
ts.columns = ['date','project','move_type','production','name','hm_start']
ts['name'] = ts['name'].apply(lambda x: str(x).split('/')[0])

In [19]:
ts = ts[(ts.production.isin(['Hauling Ore'])) & (ts.project.isin(['Mining KBM','Mining BDM']))]
ts['date'] = pd.to_datetime(ts['date']).dt.date
ts['date'] = ts['date'].apply(lambda x: str(x))

In [20]:
ts['id'] = ts.date + '#' + ts.name
df['id'] = df.date + '#' + df.name

In [21]:
df = df.drop_duplicates().merge(ts[['id','hm_start']].drop_duplicates(), on='id', how='left')

In [22]:
df = df.dropna(axis=0)

In [23]:
df.drop(columns='id', inplace=True)

In [24]:
df = df.drop_duplicates(subset=['date','project','production','move_type','name','fuel'])

In [25]:
df['my'] = pd.to_datetime(df.date).dt.strftime('%m/%y')

In [26]:
fc_all = df[(df.my.isin(['08/23','09/23','10/23','11/23','12/23']))]

In [27]:
fc_all['date'] = fc_all['date'].apply(retext_date)

In [28]:
# df.to_excel('./data kak anna/data clean kak ann.xlsx', index=False)

# Data Putri