In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
data_2017 = pd.read_excel('./dataset/2017.xlsx')
data_2018 = pd.read_excel('./dataset/2018.xlsx')
data_2019 = pd.read_excel('./dataset/2019.xlsx')
data_2020 = pd.read_excel('./dataset/2020.xlsx')
data_2021 = pd.read_excel('./dataset/2021.xlsx')

# Gabungkan data dari semua tahun
data = pd.concat([data_2017, data_2018, data_2019, data_2020, data_2021])
data = data.replace('-', np.nan)
data = data.fillna(0)

In [3]:
data_encoded = pd.get_dummies(data, columns=['Provinsi'])

In [4]:
# Pisahkan fitur (X) dan target (y)
X = data_encoded.drop(['Cakalang', 'Tongkol', 'Tuna', 'Udang '], axis=1)  # Hapus spasi di nama kolom 'Udang '
y_cakalang = data_encoded['Cakalang']
y_tongkol = data_encoded['Tongkol']
y_tuna = data_encoded['Tuna']
y_udang = data_encoded['Udang ']  # Hapus spasi di nama kolom 'Udang '


In [5]:
# Split data menjadi training set dan testing set
X_train, X_test, y_train_cakalang, y_test_cakalang = train_test_split(X, y_cakalang, test_size=0.2, random_state=42)
X_train, X_test, y_train_tongkol, y_test_tongkol = train_test_split(X, y_tongkol, test_size=0.2, random_state=42)
X_train, X_test, y_train_tuna, y_test_tuna = train_test_split(X, y_tuna, test_size=0.2, random_state=42)
X_train, X_test, y_train_udang, y_test_udang = train_test_split(X, y_udang, test_size=0.2, random_state=42)

In [6]:

# Scaling Fitur
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [7]:
# Membuat model Random Forest
rf_cakalang = RandomForestRegressor(n_estimators=100, random_state=42)
rf_tongkol = RandomForestRegressor(n_estimators=100, random_state=42)
rf_tuna = RandomForestRegressor(n_estimators=100, random_state=42)
rf_udang = RandomForestRegressor(n_estimators=100, random_state=42)

In [8]:
# Membuat model SVM
svm_cakalang = SVR(kernel='rbf')  
svm_tongkol = SVR(kernel='rbf')
svm_tuna = SVR(kernel='rbf')
svm_udang = SVR(kernel='rbf')

In [9]:
# Train model Random Forest
rf_cakalang.fit(X_train, y_train_cakalang)
rf_tongkol.fit(X_train, y_train_tongkol)
rf_tuna.fit(X_train, y_train_tuna)
rf_udang.fit(X_train, y_train_udang)

In [10]:
# Train model SVM
svm_cakalang.fit(X_train_scaled, y_train_cakalang)
svm_tongkol.fit(X_train_scaled, y_train_tongkol)
svm_tuna.fit(X_train_scaled, y_train_tuna)
svm_udang.fit(X_train_scaled, y_train_udang)

In [11]:
# Prediksi dengan Random Forest
y_pred_rf_cakalang = rf_cakalang.predict(X_test)
y_pred_rf_tongkol = rf_tongkol.predict(X_test)
y_pred_rf_tuna = rf_tuna.predict(X_test)
y_pred_rf_udang = rf_udang.predict(X_test)

In [12]:
# Prediksi dengan SVM
y_pred_svm_cakalang = svm_cakalang.predict(X_test_scaled)
y_pred_svm_tongkol = svm_tongkol.predict(X_test_scaled)
y_pred_svm_tuna = svm_tuna.predict(X_test_scaled)
y_pred_svm_udang = svm_udang.predict(X_test_scaled)

In [13]:
# List of models and their predictions
models = ["Random Forest", "SVM"]
y_preds = [y_pred_rf_cakalang, y_pred_rf_tongkol, y_pred_rf_tuna, y_pred_rf_udang,
           y_pred_svm_cakalang, y_pred_svm_tongkol, y_pred_svm_tuna, y_pred_svm_udang]
y_tests = [y_test_cakalang, y_test_tongkol, y_test_tuna, y_test_udang] * 2  # Repeat for both models

# List of fish names
fish_names = ["Cakalang", "Tongkol", "Tuna", "Udang"]

for model, y_pred in zip(models, y_preds):
    print(f"{model} Evaluation:")
    for fish_name, y_test in zip(fish_names, y_tests):
        mse = mean_squared_error(y_test, y_pred)
        rmse = np.sqrt(mse)
        r2 = r2_score(y_test, y_pred)
        print(f"{fish_name}:")
        print("  MSE:", mse)
        print("  RMSE:", rmse)
        print("  R-squared:", r2)
    print()

Random Forest Evaluation:
Cakalang:
  MSE: 375047432.82572
  RMSE: 19366.141402605735
  R-squared: 0.940213435491508
Tongkol:
  MSE: 772442170.74572
  RMSE: 27792.843876539875
  R-squared: 0.9152060215516854
Tuna:
  MSE: 1197289457.7651482
  RMSE: 34601.870726380504
  R-squared: 0.5354179395936043
Udang:
  MSE: 2601699753.560005
  RMSE: 51006.859867668834
  R-squared: -1.1790601335853195

SVM Evaluation:
Cakalang:
  MSE: 436519007.7703142
  RMSE: 20893.037303616587
  R-squared: 0.9304142102223901
Tongkol:
  MSE: 294158009.13145715
  RMSE: 17151.035220401627
  R-squared: 0.9677091323708907
Tuna:
  MSE: 1380256646.1994565
  RMSE: 37151.8054231481
  R-squared: 0.4644215127577389
Udang:
  MSE: 2927264289.9337435
  RMSE: 54104.198450154894
  R-squared: -1.451737525029304



In [15]:
def get_future_dates(num_days=1):
    base = datetime.today()
    date_list = [base + timedelta(days=x) for x in range(num_days)]
    return [(d.strftime("%Y-%m-%d"), d.strftime("%A")) for d in date_list]

# Data prediksi 14 hari ke depan
future_dates = get_future_dates()

# Membuat DataFrame untuk menyimpan prediksi
predictions = pd.DataFrame(columns=['Tanggal', 'Hari', 'Cakalang_RF', 'Tongkol_RF', 'Tuna_RF', 'Udang_RF',
                                   'Cakalang_SVM', 'Tongkol_SVM', 'Tuna_SVM', 'Udang_SVM'])

# Melakukan prediksi untuk setiap tanggal
for date, day in future_dates:
    # Membuat data input untuk prediksi (misalnya, menggunakan rata-rata fitur dari data historis)
    input_data = X.mean().values.reshape(1, -1) 

    # Prediksi dengan Random Forest
    rf_cakalang_pred = rf_cakalang.predict(input_data)[0]
    rf_tongkol_pred = rf_tongkol.predict(input_data)[0]
    rf_tuna_pred = rf_tuna.predict(input_data)[0]
    rf_udang_pred = rf_udang.predict(input_data)[0]

    # Prediksi dengan SVM
    svm_cakalang_pred = svm_cakalang.predict(input_data)[0]
    svm_tongkol_pred = svm_tongkol.predict(input_data)[0]
    svm_tuna_pred = svm_tuna.predict(input_data)[0]
    svm_udang_pred = svm_udang.predict(input_data)[0]

    # Menambahkan prediksi ke DataFrame
    predictions = pd.concat([predictions, pd.DataFrame({
        'Tanggal': [date],
        'Hari': [day],
        'Cakalang_RF': [rf_cakalang_pred],
        'Tongkol_RF': [rf_tongkol_pred],
        'Tuna_RF': [rf_tuna_pred],
        'Udang_RF': [rf_udang_pred],
        'Cakalang_SVM': [svm_cakalang_pred],
        'Tongkol_SVM': [svm_tongkol_pred],
        'Tuna_SVM': [svm_tuna_pred],
        'Udang_SVM': [svm_udang_pred]
    })], ignore_index=True)

print(predictions)

      Tanggal     Hari  Cakalang_RF  Tongkol_RF  Tuna_RF  Udang_RF   
0  2024-05-28  Tuesday      15302.2    19074.09  4400.73   4099.38  \

   Cakalang_SVM   Tongkol_SVM     Tuna_SVM    Udang_SVM  
0   9352.080293  11640.166576  4621.824373  4016.382012  


