In [1]:
import os
import numpy as np
import pandas as pd
import pickle
from sklearn.preprocessing import OneHotEncoder, StandardScaler

In [2]:
def read_excel(path):
    return pd.read_excel(path)

def perhitungan_jumlah_smp(df):
    df['SMP_MTR'] = df['MOTOR'] * 0.8
    df['SMP_MBL'] = df['MOBIL']
    df['SMP_TRK'] = df['TRUK/BUS'] * 2.5
    df['SMP'] = round(df['SMP_MTR'] + df['SMP_MBL'] + df['SMP_TRK'])
    df['JUMLAH'] = df['MOTOR'] + df['MOBIL'] + df['TRUK/BUS']
    return df

def preprocessing(df):
    encoder = OneHotEncoder(sparse_output=False)
    scaled_features = StandardScaler()
    
    # Encode HARI (pastikan encoder sudah fit pada data sebelumnya atau gunakan yang sudah disimpan)
    encoded_days = encoder.fit_transform(df[['HARI']])
    
    # Konversi 'JAM' ke total menit sejak tengah malam
    df['JAM'] = df['JAM'].apply(lambda x: int(x.split(':')[0]) * 60 + int(x.split(':')[1]))

    # Gabungkan encoded 'HARI' dengan fitur 'JAM'
    X_missing = pd.DataFrame(encoded_days, columns=encoder.get_feature_names_out(['HARI']))
    X_missing['JAM'] = scaled_features.fit_transform(df[['JAM']])

    with open('models/model_rf_mtr_bolanggandu_2023.pkl', 'rb') as file:
        model_rf_mtr = pickle.load(file)

    # Memuat model dari file
    with open('models/model_rf_mbl_bolanggandu_2023.pkl', 'rb') as file:
        model_rf_mbl = pickle.load(file)

    # Memuat model dari file
    with open('models/model_rf_bus_bolanggandu_2023.pkl', 'rb') as file:
        model_rf_bus = pickle.load(file)

    # Melakukan prediksi untuk kolom MOTOR
    motor_predictions = model_rf_mtr.predict(X_missing)
    mobil_predictions = model_rf_mbl.predict(X_missing)
    bus_predictions = model_rf_bus.predict(X_missing)

    return motor_predictions, mobil_predictions, bus_predictions
    

def minutes_to_time(minutes):
    hours = minutes // 60
    minutes = minutes % 60
    return f'{int(hours):02d}:{int(minutes):02d}'

def menggabungkan_data(df, missing_times):
    # Menggabungkan data hilang dengan DataFrame utama
    df = pd.concat([df, missing_times], ignore_index=True)
    
    # Mengurutkan DataFrame berdasarkan kolom 'Waktu'
    df = df.sort_values(by=['TAHUN','BULAN','TANGGAL','HARI','JAM']).reset_index(drop=True)

    return df


In [3]:
path_1 = '../Data/Preprocessing/Data Balonggandu/2023/1_input_balonggandu_2023.xlsx'
path_2 = '../Data/Preprocessing/Data Balonggandu/2023/1_missing_times_balonggandu_2023.xlsx'

df = read_excel(path_1)
df_missing = read_excel(path_2)

print(len(df),len(df_missing))

25470 9570


In [4]:
df_missing.head()

Unnamed: 0,WAKTU,TAHUN,BULAN,PEKAN,TANGGAL,HARI,JAM,MOTOR,MOBIL,TRUK/BUS,JUMLAH,SMP_MTR,SMP_MBL,SMP_TRK,SMP
0,2023-02-10 12:45:00,2023,2,2,10,Friday,12:45,,,,,,,,
1,2023-02-10 13:00:00,2023,2,2,10,Friday,13:00,,,,,,,,
2,2023-02-14 16:00:00,2023,2,2,14,Tuesday,16:00,,,,,,,,
3,2023-02-14 16:15:00,2023,2,2,14,Tuesday,16:15,,,,,,,,
4,2023-02-14 16:30:00,2023,2,2,14,Tuesday,16:30,,,,,,,,


In [5]:
motor_predictions, mobil_predictions, bus_predictions = preprocessing(df_missing)

In [6]:
# Menambahkan prediksi ke dalam dataframe
df_missing['MOTOR'] = motor_predictions.round()
df_missing['MOBIL'] = mobil_predictions.round()
df_missing['TRUK/BUS'] = bus_predictions.round()

In [7]:
df_missing = perhitungan_jumlah_smp(df_missing)
df_missing['JAM'] = df_missing['JAM'].apply(minutes_to_time)

In [8]:
df_missing.head()

Unnamed: 0,WAKTU,TAHUN,BULAN,PEKAN,TANGGAL,HARI,JAM,MOTOR,MOBIL,TRUK/BUS,JUMLAH,SMP_MTR,SMP_MBL,SMP_TRK,SMP
0,2023-02-10 12:45:00,2023,2,2,10,Friday,12:45,172.0,74.0,90.0,336.0,137.6,74.0,225.0,437.0
1,2023-02-10 13:00:00,2023,2,2,10,Friday,13:00,237.0,91.0,79.0,407.0,189.6,91.0,197.5,478.0
2,2023-02-14 16:00:00,2023,2,2,14,Tuesday,16:00,287.0,101.0,80.0,468.0,229.6,101.0,200.0,531.0
3,2023-02-14 16:15:00,2023,2,2,14,Tuesday,16:15,348.0,104.0,70.0,522.0,278.4,104.0,175.0,557.0
4,2023-02-14 16:30:00,2023,2,2,14,Tuesday,16:30,328.0,96.0,61.0,485.0,262.4,96.0,152.5,511.0


In [9]:
df_compiled = menggabungkan_data(df, df_missing)
df_compiled.head()

Unnamed: 0,WAKTU,TAHUN,BULAN,PEKAN,TANGGAL,HARI,JAM,MOTOR,MOBIL,TRUK/BUS,JUMLAH,SMP_MTR,SMP_MBL,SMP_TRK,SMP
0,2023-01-01 00:00:00,2023,1,1,1,Sunday,00:00,103.0,25.0,14.0,142.0,82.4,25.0,35.0,142.0
1,2023-01-01 00:15:00,2023,1,1,1,Sunday,00:15,111.0,16.0,12.0,139.0,88.8,16.0,30.0,135.0
2,2023-01-01 00:30:00,2023,1,1,1,Sunday,00:30,114.0,26.0,6.0,146.0,91.2,26.0,15.0,132.0
3,2023-01-01 00:45:00,2023,1,1,1,Sunday,00:45,76.0,24.0,11.0,111.0,60.8,24.0,27.5,112.0
4,2023-01-01 01:00:00,2023,1,1,1,Sunday,01:00,73.0,24.0,7.0,104.0,58.4,24.0,17.5,100.0


In [10]:
df_missing.to_excel('../Data/Preprocessing/Data Balonggandu/2023/2_missing_times_balonggandu_2023.xlsx', index=False)
df_compiled.to_excel('../Data/Preprocessing/Data Balonggandu/2023/2_input_balonggandu_2023.xlsx', index=False)