In [70]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import joblib
from tkinter import *
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [71]:
data = pd.read_csv('dataset/fish_data.csv')

In [72]:
data.head()

Unnamed: 0,Fish Species,Price,Grade,Catching Method,Sustainability,Actual Price
0,Bawal Hitam,36000,B,Farmed,Moderately sustainable,30000
1,Bawal Putih,72000,B,Farmed,Moderately sustainable,60000
2,Cakalang,30000,B,Farmed,Moderately sustainable,25000
3,Kakap Merah,66000,B,Farmed,Moderately sustainable,55000
4,Kembung,30000,B,Farmed,Moderately sustainable,25000


In [73]:
data.shape

(1000, 6)

In [74]:
data.isnull().sum()

Fish Species       0
Price              0
Grade              0
Catching Method    0
Sustainability     0
Actual Price       0
dtype: int64

In [75]:
data.describe()

Unnamed: 0,Price,Actual Price
count,1000.0,1000.0
mean,35555.479,47964.664
std,36934.953043,42335.285309
min,63.0,63.0
25%,14477.5,25000.0
50%,27968.5,41543.5
75%,45612.75,60047.5
max,403200.0,280000.0


In [76]:
sorted(data['Price'],reverse=True)

[403200,
 336000,
 336000,
 322560,
 280000,
 268800,
 268800,
 268800,
 224000,
 224000,
 215040,
 201600,
 179200,
 168000,
 168000,
 161280,
 140000,
 134400,
 134400,
 115200,
 113330,
 112000,
 112000,
 109906,
 101694,
 100800,
 100667,
 100539,
 98176,
 96000,
 96000,
 95466,
 92160,
 89600,
 89538,
 87465,
 86896,
 86795,
 86400,
 86400,
 85281,
 84574,
 84000,
 84000,
 84000,
 83680,
 82535,
 80640,
 80083,
 80000,
 79936,
 79917,
 79778,
 79644,
 79521,
 79200,
 78826,
 77922,
 76800,
 76800,
 76800,
 76537,
 76316,
 75739,
 75516,
 75468,
 75331,
 75138,
 74751,
 74087,
 73922,
 73893,
 73314,
 72894,
 72825,
 72695,
 72452,
 72187,
 72000,
 72000,
 72000,
 72000,
 71802,
 71672,
 71040,
 70885,
 70683,
 70032,
 70000,
 69968,
 69911,
 69333,
 69240,
 69180,
 69140,
 69120,
 69120,
 68909,
 68685,
 68628,
 68544,
 68187,
 68111,
 68088,
 67795,
 67694,
 67431,
 67200,
 67200,
 67200,
 67200,
 66936,
 66816,
 66702,
 66639,
 66278,
 66000,
 66000,
 65960,
 65774,
 65546,
 652

# Data mapping humu humu~

In [77]:
data['Fish Species'].unique()
data['Fish Species'] = data['Fish Species'].map({'Bawal Hitam':0,'Bawal Putih':1,'Cakalang':2,
                                                 "Kakap Merah":3,"Kembung":4,"Kerapu":5,
                                                 "Kuwe":6,"Makerel":7,"Salmon":8,
                                                 "Tenggiri":9,"Tongkol":10})

In [78]:
data['Grade'].unique()
data['Grade'] = data['Grade'].map({'C':0,'B':1,'A':2})

In [79]:
data['Sustainability'].unique()
data['Sustainability'] =data['Sustainability'].map({'Moderately sustainable':1,'Unsustainable':0})


In [80]:
data['Catching Method'].unique()
data['Catching Method'] = data['Catching Method'].map({'Farmed':3,'Netting':2, 'Spearfishing':0, 'Fishing Hook':1})

# Data Normalization humu humu~

In [81]:
from sklearn.preprocessing import MinMaxScaler
import joblib

scaler = MinMaxScaler(feature_range=(0,1))
scaler2 = MinMaxScaler(feature_range=(0,1))
data['Actual Price'] = scaler.fit_transform(data[['Actual Price']])
data['Price'] = scaler2.fit_transform(data[['Price']])

In [82]:
joblib.dump(scaler, 'actual_price_scaler.pkl')
joblib.dump(scaler2, 'price_scaler.pkl')

['price_scaler.pkl']

In [83]:
data.head()

Unnamed: 0,Fish Species,Price,Grade,Catching Method,Sustainability,Actual Price
0,0,0.089143,1,3,1,0.106942
1,1,0.178443,1,3,1,0.214109
2,2,0.07426,1,3,1,0.089081
3,3,0.16356,1,3,1,0.196248
4,4,0.07426,1,3,1,0.089081


In [84]:
print(scaler.inverse_transform(data[['Actual Price']]))

[[3.0000e+04]
 [6.0000e+04]
 [2.5000e+04]
 [5.5000e+04]
 [2.5000e+04]
 [8.0000e+04]
 [7.0000e+04]
 [3.0000e+04]
 [2.8000e+05]
 [6.0000e+04]
 [3.8000e+04]
 [3.0000e+04]
 [6.0000e+04]
 [2.5000e+04]
 [5.5000e+04]
 [2.5000e+04]
 [8.0000e+04]
 [7.0000e+04]
 [3.0000e+04]
 [2.8000e+05]
 [6.0000e+04]
 [3.8000e+04]
 [3.0000e+04]
 [6.0000e+04]
 [2.5000e+04]
 [5.5000e+04]
 [2.5000e+04]
 [8.0000e+04]
 [7.0000e+04]
 [3.0000e+04]
 [2.8000e+05]
 [6.0000e+04]
 [3.8000e+04]
 [3.0000e+04]
 [6.0000e+04]
 [2.5000e+04]
 [5.5000e+04]
 [2.5000e+04]
 [8.0000e+04]
 [7.0000e+04]
 [3.0000e+04]
 [2.8000e+05]
 [6.0000e+04]
 [3.8000e+04]
 [3.0000e+04]
 [6.0000e+04]
 [2.5000e+04]
 [5.5000e+04]
 [2.5000e+04]
 [8.0000e+04]
 [7.0000e+04]
 [3.0000e+04]
 [2.8000e+05]
 [6.0000e+04]
 [3.8000e+04]
 [3.0000e+04]
 [6.0000e+04]
 [2.5000e+04]
 [5.5000e+04]
 [2.5000e+04]
 [8.0000e+04]
 [7.0000e+04]
 [3.0000e+04]
 [2.8000e+05]
 [6.0000e+04]
 [3.8000e+04]
 [3.0000e+04]
 [6.0000e+04]
 [2.5000e+04]
 [5.5000e+04]
 [2.5000e+04]
 [8.00

In [85]:
data

Unnamed: 0,Fish Species,Price,Grade,Catching Method,Sustainability,Actual Price
0,0,0.089143,1,3,1,0.106942
1,1,0.178443,1,3,1,0.214109
2,2,0.074260,1,3,1,0.089081
3,3,0.163560,1,3,1,0.196248
4,4,0.074260,1,3,1,0.089081
...,...,...,...,...,...,...
995,5,0.014397,1,0,0,0.043438
996,6,0.017103,1,0,0,0.051562
997,7,0.014119,1,0,0,0.042606
998,8,0.035779,1,0,0,0.107592


# Feature and label split

In [86]:
X = data.drop(['Price', 'Fish Species'],axis=1)
y = data['Price']
print(X.head())
print(y.head())

   Grade  Catching Method  Sustainability  Actual Price
0      1                3               1      0.106942
1      1                3               1      0.214109
2      1                3               1      0.089081
3      1                3               1      0.196248
4      1                3               1      0.089081
0    0.089143
1    0.178443
2    0.074260
3    0.163560
4    0.074260
Name: Price, dtype: float64


# Train test split 

In [87]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [88]:
import tensorflow as tf

# Deep Learning Model

In [89]:
model_using_huber = tf.keras.models.Sequential([
    tf.keras.layers.Dense(20, activation='relu', input_shape=[4]),
    tf.keras.layers.Dense(25, activation='relu'),  
    tf.keras.layers.Reshape((5, 5)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(8)),
    tf.keras.layers.Dense(1)
])

In [90]:
model_using_huber.compile(optimizer=tf.keras.optimizers.Adam(learning_rate = 0.001), loss=tf.keras.losses.Huber(), metrics=['mse'])

In [91]:
history4 = model_using_huber.fit(X_train, y_train,validation_data=[X_test,y_test], epochs=100)

Epoch 1/100


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

# XGB Regressor Model

In [92]:
from xgboost import XGBRegressor
from sklearn import metrics

In [93]:
xg = XGBRegressor()
xg.fit(X_train,y_train)

In [94]:
y_pred1 = model_using_huber.predict(X_test)
score1 = metrics.r2_score(y_test,y_pred1)

y_pred2 = xg.predict(X_test)
score2 = metrics.r2_score(y_test,y_pred2)



In [95]:
print('model1 : ',score1)
print('model2 : ',score2)

model1 :  0.997893441689192
model2 :  0.9962971232663108


In [96]:
print(X.shape)
print(y.shape)

(1000, 4)
(1000,)


In [97]:
arr = scaler.inverse_transform(data[['Actual Price']])
arr2 = []
for array in arr :
    arr2.append(array[0])
    
print(arr2)

[30000.0, 60000.0, 25000.0, 55000.0, 25000.0, 80000.0, 70000.0, 30000.0, 280000.0, 60000.0, 38000.0, 30000.0, 60000.0, 25000.0, 55000.0, 25000.0, 80000.0, 70000.0, 30000.0, 280000.0, 60000.0, 38000.0, 30000.0, 60000.0, 25000.0, 55000.0, 25000.0, 80000.0, 70000.0, 30000.0, 280000.0, 60000.0, 38000.0, 30000.0, 60000.0, 25000.0, 55000.0, 25000.0, 80000.0, 70000.0, 30000.0, 280000.0, 60000.0, 38000.0, 30000.0, 60000.0, 25000.0, 55000.0, 25000.0, 80000.0, 70000.0, 30000.0, 280000.0, 60000.0, 38000.0, 30000.0, 60000.0, 25000.0, 55000.0, 25000.0, 80000.0, 70000.0, 30000.0, 280000.0, 60000.0, 38000.0, 30000.0, 60000.0, 25000.0, 55000.0, 25000.0, 80000.0, 70000.0, 30000.0, 280000.0, 60000.0, 38000.0, 30000.0, 60000.0, 25000.0, 55000.0, 25000.0, 80000.0, 70000.0, 30000.0, 280000.0, 60000.0, 38000.0, 30000.0, 60000.0, 25000.0, 55000.0, 25000.0, 80000.0, 70000.0, 30000.0, 280000.0, 60000.0, 38000.0, 30000.0, 60000.0, 25000.0, 55000.0, 25000.0, 80000.0, 70000.0, 30000.0, 280000.0, 60000.0, 38000.0,

In [98]:
arr_price = scaler2.inverse_transform(data[['Price']])
arr_price2 = []
for array in arr_price :
    arr_price2.append(array[0])
print(arr_price2)

[36000.0, 72000.0, 30000.0, 66000.0, 30000.0, 96000.0, 84000.0, 36000.0, 336000.0, 72000.0, 45600.0, 43200.0, 86400.0, 36000.0, 79200.0, 36000.0, 115200.0, 100800.0, 43200.0, 403200.0, 86400.0, 54720.00000000001, 18000.0, 36000.0, 15000.0, 33000.0, 15000.0, 48000.0, 42000.0, 18000.0, 168000.0, 36000.0, 22800.0, 30000.0, 60000.0, 25000.0, 55000.00000000001, 25000.0, 80000.0, 70000.0, 30000.0, 280000.0, 60000.0, 38000.0, 36000.0, 72000.0, 30000.0, 66000.0, 30000.0, 96000.0, 84000.0, 36000.0, 336000.0, 72000.0, 45600.0, 15000.0, 30000.0, 12500.0, 27500.000000000004, 12500.0, 40000.0, 35000.0, 15000.0, 140000.0, 30000.0, 19000.0, 18000.0, 36000.0, 15000.0, 33000.0, 15000.0, 48000.0, 42000.0, 18000.0, 168000.0, 36000.0, 22800.0, 21600.0, 43200.0, 18000.0, 39600.0, 18000.0, 57600.0, 50400.0, 21600.0, 201600.0, 43200.0, 27360.000000000004, 9000.0, 18000.0, 7500.0, 16500.0, 7500.0, 24000.0, 21000.0, 9000.0, 84000.0, 18000.0, 11400.0, 24000.0, 48000.0, 20000.0, 44000.0, 20000.0, 64000.0, 56000.

In [100]:
xgb = XGBRegressor()
model_xgb = xgb.fit(X, y)

In [99]:
# model_using_huber.save('price_predictor_complex_lstm_huber.h5')

In [None]:
# joblib.dump(model_xgb,'price_predictor_xgb.h5')

In [101]:
import numpy as np
import math

In [102]:
def round_to_multiple(number, multiple):
    towards = multiple * ((number + multiple - 1) // multiple)
    downwards = multiple * (number // multiple)

    if abs(number - towards) < abs(number - downwards):
        return towards
    else:
        return downwards

In [111]:
def predict_price(data, model):
    prediction = model.predict(data)
    prediction = prediction.reshape(-1, 1)  
    prediction = scaler2.inverse_transform(prediction)
    prediction_float = prediction.item()
    rounded_value = round_to_multiple(round(prediction_float, 0),1000)
    
    return rounded_value

In [115]:

data_new = pd.DataFrame({
    'Grade':2,
    'Catching Method':3,
    'Sustainability':1,
    'Actual Price' : 30000

},index=[0])

if data_new['Grade'].iloc[0] == 0 or data_new['Actual Price'].iloc[0] == 0:
    result_array = np.array([0])
    float_number = float(result_array[0])
    price = float_number
else:

    data_new['Actual Price'] = scaler.transform(data_new[['Actual Price']])
    price1 = predict_price(data_new,model_using_huber)
    price2 = predict_price(data_new,model_xgb)

    average_price = round_to_multiple((price1+price2)/2, 1000)
    result_array = np.array([price1, price2], dtype=int)
    
print(result_array)
print(average_price)

[43000 42000]
42000.0
