In [None]:
import pandas as pd
import numpy as np

np.random.seed(42)
n = 200

data = pd.DataFrame({
    "ipk_1": np.random.uniform(2.0, 4.0, n),
    "ipk_2": np.random.uniform(2.0, 4.0, n),
    "ipk_3": np.random.uniform(2.0, 4.0, n),
    "ipk_4": np.random.uniform(2.0, 4.0, n),
    "sks": np.random.randint(80, 150, n),
    "hadir": np.random.uniform(60, 100, n),
    "organisasi": np.random.randint(0, 2, n),
    "mengulang": np.random.randint(0, 6, n)
})

# target yg ingin di capai
data["lama_studi"] = (
    10
    - (data[["ipk_1", "ipk_2", "ipk_3", "ipk_4"]].mean(axis=1) - 2) * 2
    - (data["hadir"] / 100)
    + (data["mengulang"] * 0.5)
).round(1)

data.to_csv("regression/data/mahasiswa.csv", index=False)
data.head()

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
import pandas as pd
import joblib

# untuk load dataset
data = pd.read_csv("regression/data/mahasiswa.csv")

x = data.drop("lama_studi", axis=1)
y = data["lama_studi"]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# scaling fitur
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

# model yang akan di gunakan pada project imi
lr = LinearRegression()
lr.fit(x_train_scaled, y_train)

rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(x_train, y_train)

# evaluasi model
pred_lr = lr.predict(x_test_scaled)
pred_rf = rf.predict(x_test)

print("Linear Regression MAE:", mean_absolute_error(y_test, pred_lr))
print("Random Forest MAE:", mean_absolute_error(y_test, pred_rf))

# simpan model dan scaler di file model/regression_model.pkl dan model/regression_scaler.pkl
joblib.dump(lr, "regression/model/regression_model.pkl")
joblib.dump(scaler, "regression/model/regression_scaler.pkl")   