In [1]:
import pandas as pd

df = pd.read_csv("dataset_tomat.csv")
df

Unnamed: 0,Harga,Hari,Cuaca,Promo,Total_Penjualan
0,8000,Senin,Cerah,Tidak,92
1,7500,Senin,Berawan,Tidak,88
2,12000,Selasa,Hujan,Ya,105
3,9500,Selasa,Mendung,Tidak,70
4,7000,Rabu,Cerah,Tidak,98
...,...,...,...,...,...
90,7000,Jumat,Mendung,Tidak,80
91,6500,Sabtu,Cerah,Ya,178
92,11200,Sabtu,Cerah,Tidak,130
93,11500,Minggu,Cerah,Tidak,138


In [7]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

X=df[['Harga', 'Hari', 'Cuaca', 'Promo']]
y=df["Total_Penjualan"]

X_train, X_test, y_train, y_test = train_test_split (
    X, y, test_size=0.2, random_state=42)

numeric_column = ["Harga"]
categori_column = ["Hari", "Cuaca", "Promo"]

preprocessing = ColumnTransformer (
    transformers = [
        ("scaler", StandardScaler(), numeric_column),
        ("ohe", OneHotEncoder(), categori_column)
    ]
)

model = Pipeline (
    steps = [
        ("preprocessing", preprocessing),
        ("model", DecisionTreeRegressor(random_state=42, max_depth=60))
    ]
)

model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("R2 Score : ", r2_score(y_test, y_pred))
print("MAE : ", mean_absolute_error(y_test, y_pred))
print("MSE : ", mean_squared_error(y_test, y_pred))

R2 Score :  0.814296665389038
MAE :  8.947368421052632
MSE :  214.73684210526315


In [5]:
data_baru = pd.DataFrame([[6000, "Senin", "Mendung", "Ya"]], columns=["Harga", "Hari", "Cuaca", "Promo"])
prediksi = model.predict(data_baru)[0]
print(f"Model memprediksi total penjualan tomat {prediksi}")      

Model memprediksi total penjualan tomat 137.5
