In [3]:
import pandas as pd
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score

file_path = 'datasets.csv'
data = pd.read_csv(file_path)

In [4]:
features = ['Purchase_Frequency', 'Average_Order_Value', 'Churn_Probability', 'Time_Between_Purchases']
target = 'Lifetime_Value'

# Mengisi nilai kosong dengan nilai rata-rata pada setiap kolom/fitur yang dipilih
for feature in features:
    data[feature] = data[feature].fillna(data[feature].mean())

# Melakukan handling Outlier menggunakan metode IQR
for feature in features:
    Q1 = data[feature].quantile(0.25)
    Q3 = data[feature].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    data[feature] = data[feature].clip(lower_bound, upper_bound)

X = data[features]
y = data[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
model = GradientBoostingRegressor(random_state=1, n_estimators=2000, learning_rate=0.1, max_depth=4)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print('Mean Absolute Error (MAE):', mae)
print('R-squared (R2):', r2)

Mean Absolute Error (MAE): 393.4388063459676
R-squared (R2): 0.9856085470529579


In [6]:
import pickle

with open('model.pkl', 'wb') as f:
    pickle.dump(model, f)

In [7]:
with open('model.pkl', 'rb') as f:
    loaded_model = pickle. load(f)

predictions = loaded_model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Absolute Error (MAE):", mae)
print("R-squared (R2):", r2)

Mean Absolute Error (MAE): 393.4388063459676
R-squared (R2): 0.9856085470529579
