In [27]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [22]:
# Load dataset
data_df = pd.read_excel('./open_tender_history.xlsx')

# Create the project dataset
df = data_df[['Kategori', 'Sumber Dana', 'Nama KLPD', 'LPSE', 'Satker', 'HPS', 'PAGU', 'Kontrak', 'Skor Total', 'Nama Pemenang']].drop_duplicates()

In [23]:
df.head(5)

Unnamed: 0,Kategori,Sumber Dana,Nama KLPD,LPSE,Satker,HPS,PAGU,Kontrak,Skor Total,Nama Pemenang
0,Pengadaan Barang,APBN,Kementerian Pertanian,LPSE Kementerian Pertanian,BALAI BESAR UJI STANDAR KARANTINA PERTANIAN,1721075000.0,1750000000.0,1551181000.0,50.0,-
1,Jasa Lainnya,APBN,Kementerian Desa Pembangunan Daerah Tertinggal...,"LPSE Kementerian Desa, Pembangunan Daerah Tert...",SEKRETARIAT JENDERAL KEMENTERIAN DESA PDT DAN ...,1210000000.0,1210000000.0,1207580000.0,60.71,-
2,Jasa Konsultansi Badan Usaha Non Konstruksi,APBN,Kementerian Perhubungan,LPSE Kementerian Perhubungan,BANDAR UDARA REMBELE TAKENGON,127215000.0,127256000.0,123000000.0,32.14,3D STATION CONSULTANT
3,Jasa Konsultansi Badan Usaha Non Konstruksi,APBN,Kementerian Komunikasi Dan Informatika,LPSE Kementerian Komunikasi dan Informatika,DIREKTORAT JENDERAL APLIKASI INFORMATIKA,9554658000.0,9900000000.0,9331282000.0,,A.T. Kearney
4,Jasa Konsultansi Badan Usaha Non Konstruksi,APBN,Kementerian Komunikasi Dan Informatika,LPSE Kementerian Komunikasi dan Informatika,SEKRETARIAT JENDERAL KOMINFO,9934500000.0,10000000000.0,9657000000.0,,A.T. Kearney


### Data Preparation

In [24]:
# Handle missing values
for column in df.columns:
    if df[column].dtype == np.number:
        df[column] = df[column].fillna(df[column].median())
    else:
        df[column] = df[column].fillna(df[column].mode()[0])

# Label encoding for categorical variables
le = LabelEncoder()
categorical_columns = ['Kategori', 'Sumber Dana', 'Satker', 'Nama KLPD', 
                       'Nama Pemenang', 'LPSE', ]

label_encoders = {}
for column in categorical_columns:
    label_encoders[column] = LabelEncoder()
    df[column] = label_encoders[column].fit_transform(df[column])
    
# Split the dataset
X = df.drop('Nama Pemenang', axis=1)
y = df['Nama Pemenang']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize numeric columns
scaler = MinMaxScaler()
numeric_columns = ['HPS', 'PAGU', 'Kontrak']
X_train[numeric_columns] = scaler.fit_transform(X_train[numeric_columns])
X_test[numeric_columns] = scaler.transform(X_test[numeric_columns])

  This is separate from the ipykernel package so we can avoid doing imports until
  This is separate from the ipykernel package so we can avoid doing imports until
  This is separate from the ipykernel package so we can avoid doing imports until
  This is separate from the ipykernel package so we can avoid doing imports until
  This is separate from the ipykernel package so we can avoid doing imports until


In [25]:
print(X_train.dtypes)

Kategori         int32
Sumber Dana      int32
Nama KLPD        int32
LPSE             int32
Satker           int32
HPS            float64
PAGU           float64
Kontrak        float64
Skor Total     float64
dtype: object


### Model Training

In [None]:
# Define the model architecture
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=[X_train.shape[1]]),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(len(df['Nama Pemenang'].unique()), activation='softmax')  # number of classes (i.e., unique vendors)
])

# Compile the model with a different optimizer and learning rate
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Early stopping and model checkpointing
early_stopping = EarlyStopping(monitor='val_loss', patience=10)
model_checkpoint = ModelCheckpoint('best_model.h5', monitor='val_loss', save_best_only=True)

# Train the model
history = model.fit(X_train, y_train, 
                    epochs=15, 
                    validation_data=(X_test, y_test), 
                    callbacks=[early_stopping, model_checkpoint])


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100