In [56]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import joblib
from tkinter import *
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from xgboost import XGBRegressor

In [57]:
data = pd.read_csv('dataset/fish_data.csv')

## 1. Display Top 5 Rows of The Dataset

In [58]:
data.head()

Unnamed: 0,Fish Species,Price,Grade,Catching Method,Sustainability,Actual Price
0,Bawal Hitam,36000,B,Farmed,Moderately sustainable,30000
1,Bawal Putih,72000,B,Farmed,Moderately sustainable,60000
2,Cakalang,30000,B,Farmed,Moderately sustainable,25000
3,Kakap Merah,66000,B,Farmed,Moderately sustainable,55000
4,Kembung,30000,B,Farmed,Moderately sustainable,25000


## 2. Find Shape Fish Dataset (Number of Rows And Number of Columns)

In [59]:
data.shape

(1000, 6)

## 3. Check Null Values In The Dataset

In [60]:
data.isnull().sum()

Fish Species       0
Price              0
Grade              0
Catching Method    0
Sustainability     0
Actual Price       0
dtype: int64

In [61]:
data.describe()

Unnamed: 0,Price,Actual Price
count,1000.0,1000.0
mean,35555.479,47964.664
std,36934.953043,42335.285309
min,63.0,63.0
25%,14477.5,25000.0
50%,27968.5,41543.5
75%,45612.75,60047.5
max,403200.0,280000.0


In [62]:
sorted(data['Price'],reverse=False)

[63,
 118,
 139,
 229,
 242,
 343,
 498,
 585,
 618,
 707,
 744,
 780,
 808,
 854,
 1360,
 1366,
 1380,
 1411,
 1426,
 1576,
 1660,
 1683,
 1692,
 1732,
 1762,
 1831,
 1905,
 1921,
 1974,
 2231,
 2309,
 2384,
 2456,
 2632,
 2655,
 2659,
 2662,
 2712,
 2762,
 2979,
 3055,
 3217,
 3287,
 3340,
 3405,
 3492,
 3560,
 3626,
 3644,
 3645,
 3687,
 3761,
 3829,
 3860,
 3931,
 3941,
 3950,
 3983,
 4030,
 4150,
 4419,
 4439,
 4487,
 4581,
 4652,
 4724,
 4816,
 5124,
 5276,
 5371,
 5389,
 5470,
 5480,
 5496,
 5566,
 5596,
 5637,
 5662,
 5715,
 5743,
 5755,
 5760,
 5776,
 5853,
 5867,
 5935,
 5964,
 5990,
 6000,
 6000,
 6037,
 6136,
 6172,
 6198,
 6251,
 6264,
 6337,
 6339,
 6374,
 6404,
 6478,
 6509,
 6687,
 6713,
 6790,
 6849,
 6958,
 6995,
 7091,
 7200,
 7200,
 7212,
 7276,
 7358,
 7393,
 7500,
 7500,
 7638,
 7674,
 7684,
 7725,
 7761,
 7837,
 7879,
 8000,
 8000,
 8119,
 8243,
 8345,
 8406,
 8410,
 8464,
 8516,
 8621,
 8671,
 8711,
 8730,
 8832,
 8842,
 8863,
 8952,
 9000,
 9000,
 9027,
 9043,


## 4. Encoding the Categorical Columns

In [63]:
data['Grade'].unique()
data['Grade'] = data['Grade'].map({'C':0,'B':1,'A':2})

In [64]:
data['Sustainability'].unique()
data['Sustainability'] =data['Sustainability'].map({'Moderately sustainable':1,'Unsustainable':0})


In [65]:
data['Catching Method'].unique()
data['Catching Method'] = data['Catching Method'].map({'Farmed':3,'Netting':2, 'Spearfishing':0, 'Fishing Hook':1})

## 5. Transform the 'Actual Price' and 'Price' columns in the data DataFrame using MinMaxScaler

In [66]:
data.head()

Unnamed: 0,Fish Species,Price,Grade,Catching Method,Sustainability,Actual Price
0,Bawal Hitam,36000,1,3,1,30000
1,Bawal Putih,72000,1,3,1,60000
2,Cakalang,30000,1,3,1,25000
3,Kakap Merah,66000,1,3,1,55000
4,Kembung,30000,1,3,1,25000


In [67]:
data

Unnamed: 0,Fish Species,Price,Grade,Catching Method,Sustainability,Actual Price
0,Bawal Hitam,36000,1,3,1,30000
1,Bawal Putih,72000,1,3,1,60000
2,Cakalang,30000,1,3,1,25000
3,Kakap Merah,66000,1,3,1,55000
4,Kembung,30000,1,3,1,25000
...,...,...,...,...,...,...
995,Kerapu,5867,1,0,0,12223
996,Kuwe,6958,1,0,0,14497
997,Makerel,5755,1,0,0,11990
998,Salmon,14487,1,0,0,30182


## 6. Store Feature Matrix In X and Response(Target) In Vector y

In [68]:
X = data.drop(['Fish Species','Price'],axis=1)
y = data['Price']
print(X.head())
print(y.head())

   Grade  Catching Method  Sustainability  Actual Price
0      1                3               1         30000
1      1                3               1         60000
2      1                3               1         25000
3      1                3               1         55000
4      1                3               1         25000
0    36000
1    72000
2    30000
3    66000
4    30000
Name: Price, dtype: int64


## 7. Train, Test, Split

In [69]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

## 8. Regressor Test 

In [70]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from xgboost import XGBRegressor

In [71]:
lr = LinearRegression()
lr.fit(X_train,y_train)

rf = RandomForestRegressor()
rf.fit(X_train,y_train)

xgb = GradientBoostingRegressor()
xgb.fit(X_train,y_train)

xg = XGBRegressor()
xg.fit(X_train,y_train)

In [72]:
y_pred1 = lr.predict(X_test)
y_pred2 = rf.predict(X_test)
y_pred3 = xgb.predict(X_test)
y_pred4 = xg.predict(X_test)

In [73]:
from sklearn import metrics

In [74]:
score1 = metrics.r2_score(y_test,y_pred1)
score2 = metrics.r2_score(y_test,y_pred2)
score3 = metrics.r2_score(y_test,y_pred3)
score4 = metrics.r2_score(y_test,y_pred4)

In [75]:
print(score1,score2,score3,score4)

0.7813673127300855 0.9872091665213418 0.9872338768537832 0.9965644031195666


In [76]:
final_data = pd.DataFrame({'Models':['LR','RF','GBR','XG'],
             "R2_SCORE":[score1,score2,score3,score4]})

In [77]:
final_data

Unnamed: 0,Models,R2_SCORE
0,LR,0.781367
1,RF,0.987209
2,GBR,0.987234
3,XG,0.996564


In [78]:
lr = LinearRegression()
model_lr = lr.fit(X, y)

rf = RandomForestRegressor()
model_rf = rf.fit(X, y)

gbr = GradientBoostingRegressor()
model_gbr = gbr.fit(X, y)

xgb = XGBRegressor()
model_xgb = xgb.fit(X, y)

In [79]:
import numpy as np
import math

In [80]:
def round_to_multiple(number, multiple):
    towards = multiple * ((number + multiple - 1) // multiple)
    downwards = multiple * (number // multiple)

    if abs(number - towards) < abs(number - downwards):
        return towards
    else:
        return downwards

In [81]:
def predict_price(data, model):
    prediction = model.predict(data)
    return prediction

In [95]:
# joblib.dump(model_xgb,'pricepredictor.h5')

['pricepredictor.h5']

In [96]:
# model = joblib.load('pricepredictor.h5')

In [97]:
# model.predict(data_new)

array([43046.094], dtype=float32)

In [93]:
data_new = pd.DataFrame({
    'Grade': 2,
    'Catching Method': 3,
    'Sustainability':1,
    'Actual Price': 30000
}, index=[0])

price1 = predict_price(data_new, model_lr)
price2 = predict_price(data_new, model_rf)
price3 = predict_price(data_new, model_gbr)
price4 = predict_price(data_new, model_xgb)

price1 = int(price1[0])
price2 = int(price2[0])
price3 = int(price3[0])
price4 = int(price4[0])

if data_new['Grade'].iloc[0] == 0 or data_new['Actual Price'].iloc[0] == 0:
    result_array = np.array([0])
    float_number = float(result_array[0])
    result_array = float_number
    avg_price = 0
    result_array = np.array([0], dtype=int)
else:
    avg_price = round_to_multiple((price2 + price3 + price4)/3, 1000)
    result_array = np.array([price1, price2, price3, price4], dtype=int)

print(avg_price)
print(result_array)

42000.0
[51546 42716 39883 43046]


AttributeError: 'XGBRegressor' object has no attribute 'save'