In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score

In [2]:
df = pd.read_csv('data_bersih.csv')
df.head()

Unnamed: 0,item,random_price
0,metal,7838
1,metal,7229
2,metal,7889
3,metal,7688
4,metal,7449


In [3]:
print(df['item'].unique())

['metal' 'battery' 'shoes' 'glass' 'cardboard' 'paper' 'plastic' 'clothes'
 'styrofoam']


In [4]:
# One-hot encoding untuk kolom 'item' agar menjadi fitur numerik
df = pd.get_dummies(df, columns=['item'])

In [5]:
df.head()

Unnamed: 0,random_price,item_battery,item_cardboard,item_clothes,item_glass,item_metal,item_paper,item_plastic,item_shoes,item_styrofoam
0,7838,False,False,False,False,True,False,False,False,False
1,7229,False,False,False,False,True,False,False,False,False
2,7889,False,False,False,False,True,False,False,False,False
3,7688,False,False,False,False,True,False,False,False,False
4,7449,False,False,False,False,True,False,False,False,False


In [6]:
X = df.drop(columns=['random_price'])
y = df['random_price']

In [7]:
print(X)
print(y)

     item_battery  item_cardboard  item_clothes  item_glass  item_metal  \
0           False           False         False       False        True   
1           False           False         False       False        True   
2           False           False         False       False        True   
3           False           False         False       False        True   
4           False           False         False       False        True   
..            ...             ...           ...         ...         ...   
130         False           False         False       False       False   
131         False           False         False       False       False   
132         False           False         False       False       False   
133         False           False         False       False       False   
134         False           False         False       False       False   

     item_paper  item_plastic  item_shoes  item_styrofoam  
0         False         False       Fal

In [8]:
# Bagi dataset menjadi data pelatihan dan pengujian (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
# Membuat model regresi linear
model = LinearRegression()

In [10]:
# Latih model menggunakan data pelatihan
model.fit(X_train, y_train)

# Prediksi harga pada data pengujian
y_pred = model.predict(X_test)

In [11]:
# Evaluasi model
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Absolute Error: {mae:.2f}")
print(f"R² Score: {r2:.2f}")

Mean Absolute Error: 117.04
R² Score: 0.99


In [12]:
import joblib
# Menyimpan model
joblib.dump(model, 'linear_model.pkl')

['linear_model.pkl']

In [13]:
features = list(X.columns)  # Simpan fitur DataFrame pelatihan
joblib.dump(features, 'features.pkl')

['features.pkl']