In [2]:
# Upload file
from google.colab import files
uploaded = files.upload()


Saving ENB2012_data.xlsx to ENB2012_data.xlsx


**Data Understanding**

In [3]:
import pandas as pd

# Baca file Excel
df = pd.read_excel("ENB2012_data.xlsx")

# Tampilkan 5 baris pertama
print("Sample data:")
display(df.head())

# Info kolom
print("\nInfo data:")
df.info()

# Statistik ringkasan
print("\nDeskripsi statistik:")
display(df.describe())


Sample data:


Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,Y1,Y2
0,0.98,514.5,294.0,110.25,7.0,2,0.0,0,15.55,21.33
1,0.98,514.5,294.0,110.25,7.0,3,0.0,0,15.55,21.33
2,0.98,514.5,294.0,110.25,7.0,4,0.0,0,15.55,21.33
3,0.98,514.5,294.0,110.25,7.0,5,0.0,0,15.55,21.33
4,0.9,563.5,318.5,122.5,7.0,2,0.0,0,20.84,28.28



Info data:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   X1      768 non-null    float64
 1   X2      768 non-null    float64
 2   X3      768 non-null    float64
 3   X4      768 non-null    float64
 4   X5      768 non-null    float64
 5   X6      768 non-null    int64  
 6   X7      768 non-null    float64
 7   X8      768 non-null    int64  
 8   Y1      768 non-null    float64
 9   Y2      768 non-null    float64
dtypes: float64(8), int64(2)
memory usage: 60.1 KB

Deskripsi statistik:


Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,Y1,Y2
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,0.764167,671.708333,318.5,176.604167,5.25,3.5,0.234375,2.8125,22.307195,24.58776
std,0.105777,88.086116,43.626481,45.16595,1.75114,1.118763,0.133221,1.55096,10.090204,9.513306
min,0.62,514.5,245.0,110.25,3.5,2.0,0.0,0.0,6.01,10.9
25%,0.6825,606.375,294.0,140.875,3.5,2.75,0.1,1.75,12.9925,15.62
50%,0.75,673.75,318.5,183.75,5.25,3.5,0.25,3.0,18.95,22.08
75%,0.83,741.125,343.0,220.5,7.0,4.25,0.4,4.0,31.6675,33.1325
max,0.98,808.5,416.5,220.5,7.0,5.0,0.4,5.0,43.1,48.03


**Prepocesesing**

In [4]:
# Cek missing value
print("Jumlah nilai kosong per kolom:")
print(df.isnull().sum())

# Pisahkan fitur dan target
X = df.iloc[:, 0:8]
y1 = df.iloc[:, 8]  # Heating Load
y2 = df.iloc[:, 9]  # Cooling Load

# Scaling
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


Jumlah nilai kosong per kolom:
X1    0
X2    0
X3    0
X4    0
X5    0
X6    0
X7    0
X8    0
Y1    0
Y2    0
dtype: int64


**Modeling Dan Evaluasi**

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Split data (gunakan y1 sebagai target untuk prediksi Heating Load)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y1, test_size=0.2, random_state=42)

# Model yang digunakan
models = {
    "Linear Regression": LinearRegression(),
    "Random Forest": RandomForestRegressor(random_state=42),
    "Gradient Boosting": GradientBoostingRegressor(random_state=42)
}

# Evaluasi semua model
results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    mse = mean_squared_error(y_test, preds)
    r2 = r2_score(y_test, preds)
    results[name] = {"MSE": mse, "R2": r2}

# Tampilkan hasil
print("\nEvaluasi Model (Target: Heating Load - Y1):")
for name, metrics in results.items():
    print(f"{name} - MSE: {metrics['MSE']:.2f}, R2: {metrics['R2']:.2f}")



Evaluasi Model (Target: Heating Load - Y1):
Linear Regression - MSE: 9.15, R2: 0.91
Random Forest - MSE: 0.24, R2: 1.00
Gradient Boosting - MSE: 0.27, R2: 1.00


**Deploy Simulation**


In [6]:
import joblib

# Misalnya model terbaik adalah Gradient Boosting
best_model = GradientBoostingRegressor(random_state=42)
best_model.fit(X_scaled, y1)

# Simpan model dan scaler
joblib.dump(best_model, "model_heating.pkl")
joblib.dump(scaler, "scaler.pkl")
print("Model dan scaler berhasil disimpan.")


Model dan scaler berhasil disimpan.


**Testing**

In [7]:
import numpy as np
sample = X.iloc[10].values.reshape(1, -1)
sample_scaled = scaler.transform(sample)
prediction = best_model.predict(sample_scaled)

print("Prediksi Heating Load:", prediction[0])
print("Nilai Asli:", y1.iloc[10])


Prediksi Heating Load: 19.538794149119518
Nilai Asli: 19.34


