In [50]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [51]:
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import metrics

In [52]:
path_data_lama = "/content/drive/My Drive/skripsi/dataset/bisbul_ihsan.csv"
data_lama = pd.read_csv(path_data_lama)

In [53]:
def map_e(y_true, y_pred):
  y_true, y_pred = np.array(y_true), np.array(y_pred)
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def eval(y_actual, y_pred):
  rmse = np.sqrt(metrics.mean_squared_error(y_actual, y_pred))
  mape = map_e(y_actual, y_pred)
  r2 = metrics.r2_score(y_actual, y_pred)
  print(f"rmse : {rmse}\nmape : {mape}\nr2 : {r2}")
  return rmse,mape,r2

In [54]:
# function for creating a feature importance dataframe
def imp_df(column_names, importances):
    df = pd.DataFrame({'feature': column_names,
                       'feature_importance': importances}) \
           .sort_values('feature_importance', ascending = False) \
           .reset_index(drop = True)
    return df

# plotting a feature importance dataframe (horizontal barchart)
def var_imp_plot(imp_df, title):
    imp_df.columns = ['feature', 'feature_importance']
    sns.barplot(x = 'feature_importance', y = 'feature', data = imp_df, orient = 'h', color = 'royalblue') \
       .set_title(title, fontsize = 20) 


# rf biasa

In [55]:
y = data_lama["Carotenoid"]
x = data_lama.drop("Carotenoid", axis=1)

In [56]:
x_train, x_test, y_train, y_test = train_test_split(x, 
                                                    y, 
                                                    test_size = 0.25, 
                                                    random_state=7)

In [57]:
x_train.shape

(553, 224)

In [58]:
import lightgbm as lgb

In [59]:
rf = lgb.LGBMRegressor(random_state=123)

In [60]:
start_rf = time.time()
rf.fit(x_train,y_train)
stop_rf = time.time()
waktu = stop_rf - start_rf
waktu

1.1300556659698486

In [61]:
rf_y_pred_test = rf.predict(x_test)
rf_y_pred_train = rf.predict(x_train)

In [62]:
start_rf = time.time()
rf.predict(x_test)
stop_rf = time.time()
waktu = stop_rf - start_rf
waktu

0.007826566696166992

In [63]:
rf_test_rmse, rf_test_mape, rf_test_r2 = eval(y_test,rf_y_pred_test)

rmse : 35.64142526664665
mape : 6.2868238016677
r2 : 0.9578027457259833


In [64]:
a = {'nama' : ["rf default"],
    'rmse':[rf_test_rmse], 
        'r2': [rf_test_r2], 
        'waktu':[waktu]} 
  
df = pd.DataFrame(a)

In [65]:
base_imp = imp_df(x_train.columns, rf.feature_importances_)

In [66]:
base_imp_d = base_imp.copy()

In [67]:
y = data_lama["Carotenoid"]
x = data_lama.drop("Carotenoid", axis=1)

In [68]:
w = x[base_imp.feature[:22]]

In [69]:
x_train, x_test, y_train, y_test = train_test_split(w, 
                                                    y, 
                                                    test_size = 0.25, 
                                                    random_state=7)

In [70]:
x_train.shape

(553, 22)

In [71]:
rf = lgb.LGBMRegressor(random_state=123)

In [72]:
start_rf = time.time()
rf.fit(x_train,y_train)
stop_rf = time.time()
waktu = stop_rf - start_rf

In [73]:
rf_y_pred_test = rf.predict(x_test)
rf_y_pred_train = rf.predict(x_train)

In [74]:
rf_test_rmse, rf_test_mape, rf_test_r2 = eval(y_test,rf_y_pred_test)

rmse : 37.53840525310667
mape : 5.804339633424245
r2 : 0.9531913943513837


In [75]:
a = {'nama' : ["rf default 22"],
    'rmse':[rf_test_rmse], 
        'r2': [rf_test_r2], 
        'waktu':[waktu]} 
  
df2 = pd.DataFrame(a)

# rf tunning

In [76]:
y = data_lama["Carotenoid"]
x = data_lama.drop("Carotenoid", axis=1)

In [77]:
x_train, x_test, y_train, y_test = train_test_split(x, 
                                                    y, 
                                                    test_size = 0.25, 
                                                    random_state=8)

In [78]:
x_train.shape

(553, 224)

In [79]:
best_params_ = {'bagging_fraction': 0.8,
 'feature_fraction': 0.3,
 'learning_rate': 0.06,
 'max_bin': 40,
 'max_depth': 20,
 'min_data_in_leaf': 20,
 'min_sum_hessian_in_leaf': 15,
 'n_estimators': 400,
 'num_leaves': 70,
 'subsample': 0.61}


In [80]:
rf = lgb.LGBMRegressor(**best_params_, random_state=123)

In [81]:
start_rf = time.time()
rf.fit(x_train,y_train)
stop_rf = time.time()
waktu = stop_rf - start_rf

rf_y_pred_test = rf.predict(x_test)
rf_y_pred_train = rf.predict(x_train)

In [82]:
rf_test_rmse, rf_test_mape, rf_test_r2 = eval(y_test,rf_y_pred_test)

rmse : 28.967212561472728
mape : 6.846208856203403
r2 : 0.9743212400601643


In [83]:
a = {'nama' : ["rf tuning"],
    'rmse':[rf_test_rmse], 
        'r2': [rf_test_r2], 
        'waktu':[waktu]} 
  
df5 = pd.DataFrame(a)

In [84]:
base_imp = imp_df(x_train.columns, rf.feature_importances_)

In [85]:
base_imp_t = base_imp.copy()

In [86]:
y = data_lama["Carotenoid"]
x = data_lama.drop("Carotenoid", axis=1)

In [87]:
w = x[base_imp.feature[:50]]

In [88]:
x_train, x_test, y_train, y_test = train_test_split(w, 
                                                    y, 
                                                    test_size = 0.25, 
                                                    random_state=8)

In [89]:
x_train.shape

(553, 50)

In [90]:
rf = lgb.LGBMRegressor(**best_params_, random_state=123)

In [91]:
start_rf = time.time()
rf.fit(x_train,y_train)
stop_rf = time.time()
waktu = stop_rf - start_rf

In [92]:
rf_y_pred_test = rf.predict(x_test)
rf_y_pred_train = rf.predict(x_train)

In [93]:
rf_test_rmse, rf_test_mape, rf_test_r2 = eval(y_test,rf_y_pred_test)

rmse : 28.939025583709405
mape : 6.972570857885256
r2 : 0.9743711899459853


In [94]:
a = {'nama' : ["rf tuning 50"],
    'rmse':[rf_test_rmse], 
        'r2': [rf_test_r2], 
        'waktu':[waktu]} 
  
df6 = pd.DataFrame(a)

In [95]:
df_total = pd.concat([df, df2, df5, df6])
df_total

Unnamed: 0,nama,rmse,r2,waktu
0,rf default,35.641425,0.957803,0.007827
0,rf default 22,37.538405,0.953191,0.199693
0,rf tuning,28.967213,0.974321,0.582922
0,rf tuning 50,28.939026,0.974371,0.246342


In [96]:
df_total.to_excel('/content/drive/My Drive/skripsi/perbandingan/perbandingan lgbm.xlsx')

In [97]:
df_base_imp = pd.concat([base_imp_d, base_imp_t])

In [98]:
df_base_imp.to_excel('/content/drive/My Drive/skripsi/perbandingan/base imp lgbm.xlsx')