In [None]:
print("Hello World")

Hello World


In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import cross_val_score, KFold
import matplotlib.pyplot as plt


## **1. Akses Dataset**

Akses dataset serangan hama dengan 7 jenis hama berbeda yang disertai kategori :

(T) -> jumlah area yang terkena serangan

(P) -> jumlah area yang mengalami kerusakan lebih dari 75%

In [None]:
dataset_dir = "/content/penerapan-pengelolaan-hama-terpadu-tanaman-pangan (1) (1).xlsx"

In [None]:
df = pd.ExcelFile(dataset_dir)  # Load multi-level headers
df.sheet_names


['padi',
 'jagung',
 'kedelai',
 'kacang tanah',
 'kacang hijau',
 'ubi kayu',
 'ubi jalar']

In [None]:
# Load the data from the "padi" sheet to inspect its structure and contents
padi_data = df.parse('padi')

# Display the first few rows of the "padi" dataset to understand its structure
padi_data.head(178)


Unnamed: 0,TAHUN,NO PROV,PROV,PENGGEREK BATANG PADI,Unnamed: 4,WBC,Unnamed: 6,TIKUS,Unnamed: 8,BLAS,Unnamed: 10,KRESEK,Unnamed: 12,TUNGRO,Unnamed: 14,KR/KH,Unnamed: 16,TOTAL OPUT,Unnamed: 18
0,,,,T,P,T,P,T,P,T,P,T,P,T,P,T,P,T,P
1,2018,1.0,Aceh,2818.7,0,2663.48,95.25,2722.45,0,1409,0,2738,0,0,0,0,0,12351.63,95.25
2,2018,2.0,Sumatera Utara,1799.0,0,478.56,3.18,2030.15,22.5,3042.95,0,1831.5,0,35.1,0,68.4,0,9285.66,25.68
3,2018,3.0,Sumatera Barat,116.15,0,430.7,11.6,2044.21,136.25,465.01,14.05,31.25,0.5,104.85,5.25,57.45,14.5,3249.62,182.15
4,2018,4.0,Riau,952.9,0,435.35,0,658.25,1,485.65,0,117.55,0,25,0,,,2674.7,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
171,2022,31.0,Maluku,1422.75,0,203.5,2.5,295,0,48,0,26.75,0,5.5,0,,,2001.5,2.5
172,2022,32.0,Maluku Utara,282.8,0,9,0,151.5,0,8.5,0,0,0,13.4,0,,,465.2,0
173,2022,33.0,Papua Barat,645.7,0,229.8,0,34.4,0,48.5,0,61,0,,,,,1019.4,0
174,2022,34.0,Papua,259.4,0,9,0,18.25,0,333.9,0,626.5,0,1179.25,0,,,2426.3,0


## **2. Preprocessing Data**

Membersihkan dataset dan mempersiapkan untuk dilakukan regresi linear :

- Perbaiki header

- Hapus baris yang tidak diperlukan

- Isi kolom -, null, nan, dengan nilai 0

In [None]:
# Load the dataset (adjust the file path as needed)
data = pd.read_excel(dataset_dir, header=[0, 1])

# Flatten the multi-level header into a single header and clean column names
data.columns = [" ".join(col).strip() for col in data.columns.values]

# Further clean column names to remove spaces or unusual characters
data.columns = data.columns.str.strip()
data.columns = data.columns.str.replace(" ", "_")
data.columns = data.columns.str.replace("/", "_")
data.columns = data.columns.str.replace("-", "_")

# Example for renaming specific columns
data.rename(columns={
    "TAHUN_Unnamed:_0_level_1": "TAHUN",
    "NO_PROV_Unnamed:_1_level_1": "KODE_PROVINSI",
    "PROV_Unnamed:_2_level_1": "PROVINSI",
}, inplace=True)

data.fillna(0, inplace=True)
rows_to_drop = [34, 69, 104, 139, 174]  # Raplace unwanted columns
data = data.drop(index=rows_to_drop)
data = data.drop(columns=["PROVINSI"])

data.head(39)

Unnamed: 0,TAHUN,KODE_PROVINSI,PENGGEREK_BATANG_PADI_T,PENGGEREK_BATANG_PADI_P,WBC_T,WBC_P,TIKUS_T,TIKUS_P,BLAS_T,BLAS_P,KRESEK_T,KRESEK_P,TUNGRO_T,TUNGRO_P,KR_KH_T,KR_KH_P,TOTAL_OPUT_T,TOTAL_OPUT_P
0,2018,1.0,2818.7,0.0,2663.48,95.25,2722.45,0.0,1409.0,0.0,2738.0,0.0,0.0,0.0,0.0,0.0,12351.63,95.25
1,2018,2.0,1799.0,0.0,478.56,3.18,2030.15,22.5,3042.95,0.0,1831.5,0.0,35.1,0.0,68.4,0.0,9285.66,25.68
2,2018,3.0,116.15,0.0,430.7,11.6,2044.21,136.25,465.01,14.05,31.25,0.5,104.85,5.25,57.45,14.5,3249.62,182.15
3,2018,4.0,952.9,0.0,435.35,0.0,658.25,1.0,485.65,0.0,117.55,0.0,25.0,0.0,0.0,0.0,2674.7,1.0
4,2018,5.0,345.98,0.02,70.4,4.5,448.36,29.96,169.23,3.25,61.49,1.0,4.41,0.5,0.0,0.0,1099.87,39.23
5,2018,6.0,4681.48,13.5,1728.57,126.66,8608.06,581.0,3076.3,11.0,2313.05,2.0,83.27,0.0,135.9,86.85,20626.63,821.01
6,2018,7.0,837.95,0.0,60.25,1.25,796.24,10.0,614.1,0.0,285.75,1.5,34.5,0.0,0.5,0.0,2629.29,12.75
7,2018,8.0,4717.0,0.0,1842.0,69.0,6765.0,150.0,3071.0,0.0,2885.0,0.0,1.0,0.0,26.0,0.0,19307.0,219.0
8,2018,9.0,17.42,0.0,424.1,0.0,214.95,27.0,175.7,0.0,40.43,0.0,0.0,0.0,0.0,0.0,872.6,27.0
9,2018,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
# Filter hanya kolom yang relevan (kode provinsi, tahun, dan kolom _T)
columns_to_keep = ['KODE_PROVINSI'] + [col for col in data.columns if '_T' in col]
data_T = data[columns_to_keep]

# Mengelompokkan berdasarkan kode provinsi, lalu menjumlahkan data dalam 5 tahun
data_T = data_T.groupby('KODE_PROVINSI').sum().reset_index()

data_T.head(40)

# # Menambahkan kolom Provinsi berdasarkan kode (jika ada referensi ke nama provinsi)
# # Contoh mapping kode ke nama provinsi
# provinsi_mapping = {
#     1: "Provinsi A",
#     2: "Provinsi B",
#     # Tambahkan semua 34 provinsi sesuai kode
# }

# # Menambahkan kolom Provinsi
# grouped_df['Provinsi'] = grouped_df['KODE_PROVINSI'].map(provinsi_mapping)

# # Menyusun ulang kolom
# final_df = grouped_df[['Provinsi', 'KODE_PROVINSI'] + [col for col in grouped_df.columns if '_T' in col]]

# # Menyimpan atau menampilkan hasil
# print(final_df)
# # final_df.to_excel("output.xlsx", index=False)  # Simpan ke Excel


Unnamed: 0,KODE_PROVINSI,PENGGEREK_BATANG_PADI_T,WBC_T,TIKUS_T,BLAS_T,KRESEK_T,TUNGRO_T,KR_KH_T,TOTAL_OPUT_T
0,1.0,14862.8,6480.21,12005.84,6582.85,11568.29,11.25,0.0,51511.24
1,2.0,9210.12,3167.51,8017.13,22837.48,16576.65,94.9,163.9,60067.69
2,3.0,1033.85,3836.66,8177.15,1776.77,274.8,552.82,128.65,15780.7
3,4.0,5297.481,2927.9,3236.9,2804.37,671.43,26.0,0.0,14964.081
4,5.0,2314.74,541.64,3304.04,1574.82,719.88,222.13,0.0,8677.25
5,6.0,18632.835,10267.38,30994.953083,13635.435,10670.095,128.72,694.65,85024.068083
6,7.0,3391.05,3308.4,4065.74,3462.75,1421.95,229.25,0.5,15879.64
7,8.0,28428.77,20087.695,31846.199,16314.12,15140.7,159.25,1939.25,113915.984
8,9.0,588.09,1326.54,1116.22,2146.02,122.22,1.0,0.0,5300.09
9,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


##**3. Filtering Data**

Filtering bagian data yang akan digunakan dalam model regresi linear.

In [None]:
# Independent variables
X_T = data_T[["PENGGEREK_BATANG_PADI_T",	"WBC_T",	"TIKUS_T",	"BLAS_T",	"KRESEK_T",	"TUNGRO_T",	"KR_KH_T"]]
#X_P = data[["PENGGEREK_BATANG_PADI_P",  "WBC_P",	"TIKUS_P",	"BLAS_P",	"KRESEK_P",	"TUNGRO_P",	"KR_KH_P"]]

# Target variables (TOTAL_OPUT_T, TOTAL_OPUT_P)
y_total_t = data_T["TOTAL_OPUT_T"]
#y_total_p = data["TOTAL_OPUT_P"]


In [None]:
X_T.head()
print(X_T.shape)

(34, 7)


In [None]:
# X_P.head()
# print(X_P.shape)

In [None]:
y_total_t.head()
print(y_total_t.shape)

(34,)


In [None]:
# y_total_p.head()
# print(y_total_p.shape)

## **4. UJICOBA DENGAN LIBRARY**

**4.1. Train Test Split**

Pemisahan data latih dan data uji, dataset dipisah untuk kategori prediksi T dan P dengan perbandingan 80% data latih dan 20% data uji

In [None]:
# Train Test Split To Predict TOTAL T
X_train_T, X_test_T, y_train_t, y_test_t = train_test_split(X_T, y_total_t, test_size=0.2, random_state=42)

# Train Test Split To Predict TOTAL P
# X_train_P, X_test_P, y_train_p, y_test_p = train_test_split(X_P, y_total_p, test_size=0.2, random_state=42)


In [None]:
print("Shape of X_train(T):", X_train_T.shape)
print("Shape of X_test(T):", X_test_T.shape)

# print("Shape of X_train(P):", X_train_P.shape)
# print("Shape of X_test(P):", X_test_P.shape)

Shape of X_train(T): (27, 7)
Shape of X_test(T): (7, 7)


**4.2. Model Regresi Linear**

Fit data latih ke model regresi linear. Model menggunakan library scikit-learn. Dibentuk 2 model regresi untuk masing - masing perhitungan prediksi T dan P

In [None]:
# Train model TOTAL_T
model_t = LinearRegression()
model_t.fit(X_train_T, y_train_t)

# Train model for TOTAL_P
# model_p = LinearRegression()
# model_p.fit(X_train_P, y_train_p)

Mengambil nilai intercept dan koefisien dari model yang menggunakan library

In [None]:
# Nilai intercept untuk model TOTAL_T
intercept_t = model_t.intercept_
coefficients_t = model_t.coef_

# Nilai intercept untuk TOTAL_P
# intercept_p = model_p.intercept_
# coefficients_p = model_p.coef_

print("Intercept untuk TOTAL_T:", intercept_t)
print("Koefisien untuk TOTAL_T:", coefficients_t)


Intercept untuk TOTAL_T: -5.820766091346741e-11
Koefisien untuk TOTAL_T: [1. 1. 1. 1. 1. 1. 1.]


**4.3. Pengujian & Evaluasi Model**

In [None]:
y_pred_t = model_t.predict(X_test_T)
# y_pred_p = model_p.predict(X_test_P)

# Define the K-Fold Cross-Validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Evaluate for Total_T
print("== Model Performance for TOTAL_OPUT_T ==")
print(f"MSE: {mean_squared_error(y_test_t, y_pred_t):.2f}, \nR2: {r2_score(y_test_t, y_pred_t):.2f}")
scores_t = cross_val_score(model_t, X_T, y_total_t, cv=kf, scoring='r2')
print(f"K-Fold R² Scores for TOTAL_OPUT_T: {scores_t}")
print(f"Mean R² Score for TOTAL_OPUT_T: {scores_t.mean():.2f}")

# print("\n=====================================\n")

# # Evaluate for Total_P
# print("== Model Performance for TOTAL_OPUT_P ==")
# print(f"MSE: {mean_squared_error(y_test_p, y_pred_p):.2f}, \nR2: {r2_score(y_test_p, y_pred_p):.2f}")
# scores_p = cross_val_score(model_p, X_P, y_total_p, cv=5, scoring='r2')
# print(f"K-Fold R² Scores for TOTAL_OPUT_P: {scores_p}")
# print(f"Mean R² Score for TOTAL_OPUT_P: {scores_p.mean():.2f}")


== Model Performance for TOTAL_OPUT_T ==
MSE: 0.00, 
R2: 1.00
K-Fold R² Scores for TOTAL_OPUT_T: [1. 1. 1. 1. 1.]
Mean R² Score for TOTAL_OPUT_T: 1.00


##**5. UJICOBA PERHITUNGAN MANUAL**

**5.1. Akses Data Sebelumnya**

In [None]:
# df = pd.DataFrame(data)

# X_manual = df[["PENGGEREK_BATANG_PADI_T",	"PENGGEREK_BATANG_PADI_P",	"WBC_T",	"WBC_P",	"TIKUS_T",	"TIKUS_P",	"BLAS_T",	"BLAS_P",	"KRESEK_T",	"KRESEK_P",	"TUNGRO_T",	"TUNGRO_P",	"KR_KH_T",	"KR_KH_P"]].values
# Y_T = df['TOTAL_OPUT_T'].values.reshape(-1, 1)  # Vektor target (Y_T)
# Y_P = df['TOTAL_OPUT_P'].values.reshape(-1, 1)  # Vektor target (Y_P)

XT_manual = X_T.values
# XP_manual = X_P.values
Y_T = y_total_t.values.reshape(-1, 1)  # Vektor target (Y_T)
# Y_P = y_total_p.values.reshape(-1, 1) # Vektor target (Y_P)


In [None]:
print(XT_manual)
print(XT_manual.shape)

[[1.48628000e+04 6.48021000e+03 1.20058400e+04 6.58285000e+03
  1.15682900e+04 1.12500000e+01 0.00000000e+00]
 [9.21012000e+03 3.16751000e+03 8.01713000e+03 2.28374800e+04
  1.65766500e+04 9.49000000e+01 1.63900000e+02]
 [1.03385000e+03 3.83666000e+03 8.17715000e+03 1.77677000e+03
  2.74800000e+02 5.52820000e+02 1.28650000e+02]
 [5.29748100e+03 2.92790000e+03 3.23690000e+03 2.80437000e+03
  6.71430000e+02 2.60000000e+01 0.00000000e+00]
 [2.31474000e+03 5.41640000e+02 3.30404000e+03 1.57482000e+03
  7.19880000e+02 2.22130000e+02 0.00000000e+00]
 [1.86328350e+04 1.02673800e+04 3.09949531e+04 1.36354350e+04
  1.06700950e+04 1.28720000e+02 6.94650000e+02]
 [3.39105000e+03 3.30840000e+03 4.06574000e+03 3.46275000e+03
  1.42195000e+03 2.29250000e+02 5.00000000e-01]
 [2.84287700e+04 2.00876950e+04 3.18461990e+04 1.63141200e+04
  1.51407000e+04 1.59250000e+02 1.93925000e+03]
 [5.88090000e+02 1.32654000e+03 1.11622000e+03 2.14602000e+03
  1.22220000e+02 1.00000000e+00 0.00000000e+00]
 [0.000000

In [None]:
#print(Y_T)
print(Y_T.shape)

(34, 1)


In [None]:
# #print(XP_manual)
# print(XP_manual.shape)

In [None]:
# #print(Y_P)
# print(Y_P.shape)

**5.2. Ujicoba OUTPUT_T**

5.2.1. Train Test Split

In [None]:
# Train Test Split To Predict TOTAL T
X_train_T, X_test_T, Y_train_T, Y_test_T = train_test_split(XT_manual, Y_T, test_size=0.2, random_state=42)

In [None]:
print(f"Shape data latih sumbu X (Variabel Independen) : {X_train_T.shape}")
print(f"Shape data latih sumbu Y (Variabel dependen) : {Y_train_T.shape}")
print(f"Shape data uji sumbu X (Variabel Independen) : {X_test_T.shape}")
print(f"Shape data uji sumbu Y (Variabel dependen) : {Y_test_T.shape}")

Shape data latih sumbu X (Variabel Independen) : (27, 7)
Shape data latih sumbu Y (Variabel dependen) : (27, 1)
Shape data uji sumbu X (Variabel Independen) : (7, 7)
Shape data uji sumbu Y (Variabel dependen) : (7, 1)


5.2.2. Perhitungan Regresi Linear

Tambahkan variabel untuk nilai intercept/konstanta

In [None]:
X_train_T_intercept = np.hstack((np.ones((X_train_T.shape[0], 1)), X_train_T))
X_test_T_intercept = np.hstack((np.ones((X_test_T.shape[0], 1)), X_test_T))

In [None]:
print(X_train_T_intercept)
print(X_train_T_intercept.shape)

[[1.00000000e+00 6.74783900e+04 4.78992990e+04 5.53823000e+04
  2.80100710e+04 4.39178160e+04 8.11360000e+02 4.69750000e+02]
 [1.00000000e+00 4.59990000e+03 1.74960000e+03 1.07900000e+02
  2.15300000e+02 4.28100000e+02 1.70000000e+01 3.00000000e+01]
 [1.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]
 [1.00000000e+00 1.48628000e+04 6.48021000e+03 1.20058400e+04
  6.58285000e+03 1.15682900e+04 1.12500000e+01 0.00000000e+00]
 [1.00000000e+00 2.31474000e+03 5.41640000e+02 3.30404000e+03
  1.57482000e+03 7.19880000e+02 2.22130000e+02 0.00000000e+00]
 [1.00000000e+00 4.96123000e+03 1.56422000e+03 4.82072000e+03
  4.41123000e+03 2.89430500e+03 1.37176000e+03 0.00000000e+00]
 [1.00000000e+00 1.09109500e+04 1.26750000e+03 2.42212000e+03
  6.24846000e+03 7.49773000e+03 6.99490000e+02 4.00000000e+00]
 [1.00000000e+00 1.86328350e+04 1.02673800e+04 3.09949531e+04
  1.36354350e+04 1.06700950e+04 1.28720000e+02 6.94650000e+02]


Hitung Transpose dari Matriks X

In [None]:
X_transpose = X_train_T_intercept.T

In [None]:
print(X_transpose)
print(X_transpose.shape)

[[1.00000000e+00 1.00000000e+00 1.00000000e+00 1.00000000e+00
  1.00000000e+00 1.00000000e+00 1.00000000e+00 1.00000000e+00
  1.00000000e+00 1.00000000e+00 1.00000000e+00 1.00000000e+00
  1.00000000e+00 1.00000000e+00 1.00000000e+00 1.00000000e+00
  1.00000000e+00 1.00000000e+00 1.00000000e+00 1.00000000e+00
  1.00000000e+00 1.00000000e+00 1.00000000e+00 1.00000000e+00
  1.00000000e+00 1.00000000e+00 1.00000000e+00]
 [6.74783900e+04 4.59990000e+03 0.00000000e+00 1.48628000e+04
  2.31474000e+03 4.96123000e+03 1.09109500e+04 1.86328350e+04
  1.05255900e+04 8.06730000e+04 9.21012000e+03 1.03385000e+03
  1.00269200e+04 5.29748100e+03 1.96995700e+04 2.90190000e+02
  2.67796000e+03 1.61802300e+04 1.02380920e+04 3.67798300e+04
  3.39105000e+03 2.86890000e+03 2.09264000e+03 2.84287700e+04
  2.46360000e+02 2.99427940e+04 7.51587250e+03]
 [4.78992990e+04 1.74960000e+03 0.00000000e+00 6.48021000e+03
  5.41640000e+02 1.56422000e+03 1.26750000e+03 1.02673800e+04
  9.21464583e+02 6.43291009e+04 3.16

Kalikan Matriks Transpose X dengan Matriks X itu sendiri kemudian cari nilai inversnya

In [None]:
multiplication_X = X_transpose @ X_train_T_intercept
print(multiplication_X)
print(multiplication_X.shape)

[[2.70000000e+01 4.00880065e+05 1.95997495e+05 3.32277509e+05
  1.97805348e+05 2.18189177e+05 2.09898733e+04 6.56251000e+03]
 [4.00880065e+05 1.60251436e+10 1.00225538e+10 1.31217352e+10
  7.84812962e+09 9.38275358e+09 3.99267905e+08 2.93381825e+08]
 [1.95997495e+05 1.00225538e+10 7.33084629e+09 8.63002620e+09
  5.46499736e+09 6.47567350e+09 2.65668965e+08 2.15085906e+08]
 [3.32277509e+05 1.31217352e+10 8.63002620e+09 1.23591185e+10
  7.15688114e+09 8.52129882e+09 3.26154795e+08 2.82406203e+08]
 [1.97805348e+05 7.84812962e+09 5.46499736e+09 7.15688114e+09
  4.83566539e+09 5.40313605e+09 2.13908671e+08 1.77451577e+08]
 [2.18189177e+05 9.38275358e+09 6.47567350e+09 8.52129882e+09
  5.40313605e+09 6.46736132e+09 2.31833208e+08 1.92244315e+08]
 [2.09898733e+04 3.99267905e+08 2.65668965e+08 3.26154795e+08
  2.13908671e+08 2.31833208e+08 9.62796013e+07 7.39415660e+06]
 [6.56251000e+03 2.93381825e+08 2.15085906e+08 2.82406203e+08
  1.77451577e+08 1.92244315e+08 7.39415660e+06 9.45475630e+06]]

In [None]:
inversed_multiplication_X = np.linalg.inv(multiplication_X)
print(inversed_multiplication_X)
print(inversed_multiplication_X.shape)

[[ 8.95405787e-02 -4.03900817e-06  5.98772235e-06 -1.26952581e-06
  -4.95180482e-06  2.72572286e-06 -1.14250014e-05  1.13368359e-05]
 [-4.03900817e-06  9.10705643e-10 -8.93716034e-10 -5.31403256e-10
   5.71232235e-11  1.77010236e-10  3.72692490e-10  5.78516046e-09]
 [ 5.98772235e-06 -8.93716034e-10  2.41493324e-09  5.99256023e-10
   2.07607054e-10 -1.74509412e-09 -1.27106397e-09 -1.66798056e-08]
 [-1.26952581e-06 -5.31403256e-10  5.99256023e-10  1.67405766e-09
   7.15489441e-10 -2.05731990e-09 -1.15020684e-10 -1.77716535e-08]
 [-4.95180482e-06  5.71232235e-11  2.07607054e-10  7.15489441e-10
   4.73277599e-09 -4.16546939e-09 -4.74018812e-10 -2.81890333e-08]
 [ 2.72572286e-06  1.77010236e-10 -1.74509412e-09 -2.05731990e-09
  -4.16546939e-09  6.62489949e-09  9.56214245e-10  3.64922501e-08]
 [-1.14250014e-05  3.72692490e-10 -1.27106397e-09 -1.15020684e-10
  -4.74018812e-10  9.56214245e-10  1.33878914e-08  7.70008759e-09]
 [ 1.13368359e-05  5.78516046e-09 -1.66798056e-08 -1.77716535e-08
  -

Kalikan Matriks Transpose X dengan Matriks Y

In [None]:
multiplication_Y = X_transpose @ Y_train_T
print(multiplication_Y)
print(multiplication_Y.shape)

[[1.37270198e+06]
 [5.70929656e+10]
 [3.84048520e+10]
 [5.03976209e+10]
 [3.11001698e+10]
 [3.66743008e+10]
 [1.54050730e+09]
 [1.17741874e+09]]
(8, 1)


Kalikan Hasil Invers dengan Hasil Transpose X * Y untuk Memperoleh Nilai Koefisien Tiap Variabel

In [None]:
coefficient = inversed_multiplication_X @ multiplication_Y

In [None]:
print(coefficient)
print(coefficient.shape)

[[-2.50292942e-09]
 [ 1.00000000e+00]
 [ 1.00000000e+00]
 [ 1.00000000e+00]
 [ 1.00000000e+00]
 [ 1.00000000e+00]
 [ 1.00000000e+00]
 [ 1.00000000e+00]]
(8, 1)


Uji Koefisien dengan Data Uji

In [None]:
Y_pred = X_test_T_intercept @ coefficient

Evaluasi Model

In [None]:
mse = mean_squared_error(Y_test_T, Y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(Y_test_T, Y_pred)

# kf = KFold(n_splits=5, shuffle=True, random_state=42)
# scores_p = cross_val_score(model_p, XT_manual, Y_T, cv=5, scoring='r2')

In [None]:
print("\nEvaluasi Model:")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"R-Squared (R²): {r2}")
# print(f"K-Fold R² Scores for TOTAL_OPUT_T: {scores_p}")

# Menampilkan hasil prediksi dan nilai aktual
print("\nPerbandingan Y aktual vs Y prediksi:")
for actual, pred in zip(Y_test_T, Y_pred):
    print(f"Y aktual: {actual[0]}, Y prediksi: {pred[0]}")


Evaluasi Model:
Mean Squared Error (MSE): 2.806114747809726e-18
Root Mean Squared Error (RMSE): 1.6751461869967427e-09
R-Squared (R²): 1.0

Perbandingan Y aktual vs Y prediksi:
Y aktual: 38402.765, Y prediksi: 38402.76499999756
Y aktual: 30975.59730555555, Y prediksi: 30975.597305554078
Y aktual: 93867.5, Y prediksi: 93867.50000000013
Y aktual: 76671.45000000001, Y prediksi: 76671.44999999975
Y aktual: 5300.089999999999, Y prediksi: 5300.0899999974
Y aktual: 10696.115, Y prediksi: 10696.114999998406
Y aktual: 11043.166999999998, Y prediksi: 11043.166999998532


**5.3. Ujicoba OUTPUT_P**

5.3.1. Train Test Split

In [None]:
# Train Test Split To Predict TOTAL P
X_train_P, X_test_P, Y_train_P, Y_test_P = train_test_split(XP_manual, Y_P, test_size=0.2, random_state=42)

NameError: name 'XP_manual' is not defined

In [None]:
print(f"Shape data latih sumbu X (Variabel Independen) : {X_train_P.shape}")
print(f"Shape data latih sumbu Y (Variabel dependen) : {Y_train_P.shape}")
print(f"Shape data uji sumbu X (Variabel Independen) : {X_test_P.shape}")
print(f"Shape data uji sumbu Y (Variabel dependen) : {Y_test_P.shape}")

5.3.2. Perhitungan Regresi Linear

Tambahkan variabel untuk nilai intercept/konstanta

In [None]:
X_train_P_intercept = np.hstack((np.ones((X_train_P.shape[0], 1)), X_train_P))
X_test_P_intercept = np.hstack((np.ones((X_test_P.shape[0], 1)), X_test_P))

In [None]:
# print(X_train_intercept[135])
print(X_train_P_intercept.shape)

Hitung Transpose dari Matriks X

In [None]:
X_transpose = X_train_P_intercept.T

In [None]:
print(X_transpose)
print(X_transpose.shape)

Kalikan Matriks Transpose X dengan Matriks X itu sendiri kemudian cari nilai inversnya

In [None]:
multiplication_X = X_transpose @ X_train_P_intercept
print(multiplication_X)

In [None]:
inversed_multiplication_X = np.linalg.inv(multiplication_X)
print(inversed_multiplication_X)
print(inversed_multiplication_X.shape)

Kalikan Matriks Transpose X dengan Matriks Y

In [None]:
multiplication_Y = X_transpose @ Y_train_P
print(multiplication_Y)
print(multiplication_Y.shape)

Kalikan Hasil Invers dengan Hasil Transpose X * Y untuk Memperoleh Nilai Koefisien Tiap Variabel

In [None]:
coefficient = inversed_multiplication_X @ multiplication_Y

In [None]:
print(coefficient)
print(coefficient.shape)

Uji Koefisien dengan Data Uji

In [None]:
Y_pred = X_test_P_intercept @ coefficient

Evaluasi Model

In [None]:
mse = mean_squared_error(Y_test_P, Y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(Y_test_P, Y_pred)

kf = KFold(n_splits=5, shuffle=True, random_state=42)
scores_p = cross_val_score(model_p, XP_manual, Y_P, cv=5, scoring='r2')

In [None]:

print("\nEvaluasi Model:")
print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"R-Squared (R²): {r2:.4f}")
print(f"K-Fold R² Scores for TOTAL_OPUT_P: {scores_p}")

# Menampilkan hasil prediksi dan nilai aktual
print("\nPerbandingan Y aktual vs Y prediksi:")
for actual, pred in zip(Y_test_P, Y_pred):
    print(f"Y aktual: {actual[0]:.4f}, Y prediksi: {pred[0]:.4f}")

##**FUNCTION REGRESI LINEAR**

In [None]:
def linear_regression(X, Y):
  # Transpose matriks X yang berisikan variabel independen + konstanta
  X_transpose = X.T

  # Kalikan Hasil Transpose Sebelumnya degnan Matriks X (variabel independen)
  multiplication_X = X_transpose @ X

  # Cari Invers dari Perkalian Transpose X dengan Matriks X
  inversed_multiplication_X = np.linalg.inv(multiplication_X)

  #Kalikan Hasil Transpose Sebelumnya degnan Matriks Y (variabel dependen)
  multiplication_Y = X_transpose @ Y_train_T

  # Kalikan Invers dengan Hasil Kali Transpose X & Y
  coefficient = inversed_multiplication_X @ multiplication_Y
  return coefficient


In [None]:
regression = linear_regression(X_train_T_intercept, Y_train_T)
Y_T_pred = X_test_T_intercept @ regression

In [None]:
mse = mean_squared_error(Y_test_T, Y_T_pred)
rmse = np.sqrt(mse)
r2 = r2_score(Y_test_T, Y_T_pred)

kf = KFold(n_splits=5, shuffle=True, random_state=42)
scores_p = cross_val_score(model_p, XT_manual, Y_T, cv=5, scoring='r2')

In [None]:
print("\nEvaluasi Model:")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"R-Squared (R²): {r2}")
print(f"K-Fold R² Scores for TOTAL_OPUT_P: {scores_p}")

for actual, pred in zip(Y_test_T, Y_T_pred):
    print(f"Y aktual: {actual[0]}, Y prediksi: {pred[0]}")

In [None]:
import matplotlib.pyplot as plt

plt.hist(Y_P, bins=20)
plt.xlabel('Total P')
plt.ylabel('Frequency')
plt.title('Distribusi Total P')
plt.show()

In [None]:
print(y_train_p.describe())
print(y_test_p.describe())