In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [None]:
# Load dataset
data_df = pd.read_excel('./open_tender_history.xlsx')

# Create the project dataset
df = data_df[['Kategori', 'Sumber Dana', 'Nama KLPD', 'LPSE', 'Satker', 'HPS', 'PAGU', 'Kontrak', 'Skor Total', 'Nama Pemenang']].drop_duplicates()

In [None]:
df.head(5)

### Data Preparation

In [None]:
# Handle missing values
for column in df.columns:
    if df[column].dtype == np.number:
        df[column] = df[column].fillna(df[column].median())
    else:
        df[column] = df[column].fillna(df[column].mode()[0])

# Label encoding for categorical variables
le = LabelEncoder()
categorical_columns = ['Kategori', 'Sumber Dana', 'Satker', 'Nama KLPD', 
                       'Nama Pemenang', 'LPSE', ]

label_encoders = {}
for column in categorical_columns:
    label_encoders[column] = LabelEncoder()
    df[column] = label_encoders[column].fit_transform(df[column])
    
# Split the dataset
X = df.drop('Nama Pemenang', axis=1)
y = df['Nama Pemenang']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize numeric columns
scaler = MinMaxScaler()
numeric_columns = ['HPS', 'PAGU', 'Kontrak']
X_train[numeric_columns] = scaler.fit_transform(X_train[numeric_columns])
X_test[numeric_columns] = scaler.transform(X_test[numeric_columns])

In [None]:
print(X_train.dtypes)

### Model Training

In [None]:
# Define the model architecture
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=[X_train.shape[1]]),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(len(df['Nama Pemenang'].unique()), activation='softmax')  # number of classes (i.e., unique vendors)
])

# Compile the model with a different optimizer and learning rate
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Early stopping and model checkpointing
early_stopping = EarlyStopping(monitor='val_loss', patience=10)
model_checkpoint = ModelCheckpoint('best_model.h5', monitor='val_loss', save_best_only=True)

# Train the model
history = model.fit(X_train, y_train, 
                    epochs=15, 
                    validation_data=(X_test, y_test), 
                    callbacks=[early_stopping, model_checkpoint])
