In [5]:
# Install TensorFlow (if not already installed)
!pip install tensorflow

import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder




In [6]:
# Load the dataset
file_path = '/content/df_cleaned_v2.csv'  # Adjust the path as necessary after uploading your file
df = pd.read_csv(file_path)

In [7]:
# Drop rows with missing values
df = df.dropna()

In [8]:
# Encode categorical features
categorical_features = ['brand', 'Kondisi', 'Dirakit', 'InjeksiLangsung', 'Aspirasi']
label_encoders = {}

for col in categorical_features:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

In [9]:
# Split the data into features and targets
X = df.drop(columns=['price'])
y = df['price']

In [10]:
# Normalize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [11]:
# Split the data into training and testing sets for both classification and regression
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
# Create a categorical target for classification by binning the price
y_classification = pd.qcut(y, q=3, labels=False)  # Bin the prices into 3 categories
y_train_cls, y_test_cls = train_test_split(y_classification, test_size=0.2, random_state=42)


In [13]:
# Check shapes
(X_train.shape, X_test.shape, y_train.shape, y_test.shape, y_train_cls.shape, y_test_cls.shape)


((10174, 14), (2544, 14), (10174,), (2544,), (10174,), (2544,))

In [14]:
# Define the classification model
classification_model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, input_dim=X_train.shape[1], activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(3, activation='softmax')  # 3 output classes
])

classification_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [15]:
# Train the classification model
classification_model.fit(X_train, y_train_cls, epochs=50, batch_size=32, validation_split=0.2)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x78798da77070>

In [16]:
# Evaluate the classification model
classification_loss, classification_accuracy = classification_model.evaluate(X_test, y_test_cls)
print(f"Classification Accuracy: {classification_accuracy}")

Classification Accuracy: 0.9375


In [17]:
# Define the regression model
regression_model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, input_dim=X_train.shape[1], activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1)  # Single output for regression
])

regression_model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

In [18]:
# Train the regression model
regression_model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x78797d250880>

In [19]:
# Evaluate the regression model
regression_loss, regression_mae = regression_model.evaluate(X_test, y_test)
print(f"Regression MAE: {regression_mae}")

Regression MAE: 414486816.0
