In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.datasets import mnist


In [3]:
df = pd.read_csv(r"../../dataset/fish.csv")

In [4]:
df_train, df_test = train_test_split(df, test_size=0.2, random_state=42)

In [5]:
df_train = df_train.drop(columns='Length2')
df_test = df_test.drop(columns='Length2')

In [6]:
from datasist.structdata import detect_outliers

idx = detect_outliers(
    data=df,
    n=0,  
    features=['Weight', 'Length1', 'Length3']
)

# Remove the outliers
df_clean = df.drop(idx)

In [7]:
category = df_train.select_dtypes(exclude = np.number).columns
le = LabelEncoder()
classes = dict()
for i in category:
    df_train[i] = le.fit_transform(df_train[i])
    classes[i] = le.classes_
df_train

Unnamed: 0,Species,Weight,Length1,Length3,Height,Width
75,2,51.5,15.0,17.2,4.5924,2.6316
138,3,567.0,43.2,48.7,7.7920,4.8700
2,0,340.0,23.9,31.1,12.3778,4.6961
86,2,120.0,20.0,23.5,6.1100,3.4075
45,4,160.0,20.5,25.3,7.0334,3.8203
...,...,...,...,...,...,...
71,1,300.0,24.0,29.0,11.3680,4.2340
106,2,250.0,25.9,29.4,7.8204,4.2042
14,0,600.0,29.4,37.2,14.9544,5.1708
92,2,150.0,20.5,24.0,6.7920,3.6240


In [8]:
le = LabelEncoder()
classes = dict()
cat = df_test.select_dtypes(exclude = np.number).columns
for i in cat:
    df_test[i] = le.fit_transform(df_test[i])
    classes[i] = le.classes_
df_test

Unnamed: 0,Species,Weight,Length1,Length3,Height,Width
78,2,78.0,16.8,19.4,5.1992,3.1234
155,5,13.4,11.7,13.5,2.43,1.269
128,3,200.0,30.0,34.8,5.568,3.3756
55,6,270.0,23.6,28.7,8.3804,4.2476
94,2,150.0,21.0,24.5,5.2185,3.626
29,0,1000.0,33.5,42.6,18.957,6.603
147,5,7.0,10.1,11.6,1.7284,1.1484
51,4,180.0,23.6,27.9,7.0866,3.906
98,2,188.0,22.6,26.2,6.7334,4.1658
141,3,1250.0,52.0,59.7,10.6863,6.9849


In [9]:
x_train = df_train.drop(columns = 'Weight').values
x_train

array([[ 2.    , 15.    , 17.2   ,  4.5924,  2.6316],
       [ 3.    , 43.2   , 48.7   ,  7.792 ,  4.87  ],
       [ 0.    , 23.9   , 31.1   , 12.3778,  4.6961],
       [ 2.    , 20.    , 23.5   ,  6.11  ,  3.4075],
       [ 4.    , 20.5   , 25.3   ,  7.0334,  3.8203],
       [ 4.    , 19.4   , 23.7   ,  6.1146,  3.2943],
       [ 1.    , 19.8   , 24.1   ,  9.7364,  3.1571],
       [ 2.    , 36.6   , 41.3   , 12.4313,  7.3514],
       [ 0.    , 32.    , 40.6   , 16.3618,  6.09  ],
       [ 3.    , 42.    , 48.    ,  6.96  ,  4.896 ],
       [ 5.    , 10.    , 11.6   ,  1.972 ,  1.16  ],
       [ 2.    , 20.    , 23.5   ,  5.5225,  3.995 ],
       [ 1.    , 19.    , 23.2   ,  8.5376,  3.2944],
       [ 4.    , 16.5   , 20.3   ,  5.2983,  2.8217],
       [ 2.    , 19.    , 22.5   ,  5.6925,  3.555 ],
       [ 0.    , 31.5   , 39.7   , 15.5227,  5.2801],
       [ 2.    , 19.3   , 22.8   ,  6.384 ,  3.534 ],
       [ 2.    , 18.2   , 21.    ,  5.082 ,  2.772 ],
       [ 2.    , 34.    , 39

In [10]:
y_train = df_train['Weight'].values
y_train

array([  51.5,  567. ,  340. ,  120. ,  160. ,  120. ,  145. ,  820. ,
        720. ,  500. ,    7.5,  110. ,  140. ,   69. ,  110. ,  620. ,
        130. ,   85. ,  685. ,  500. ,  514. ,  200. , 1000. ,  714. ,
          8.7, 1000. ,  110. ,  430. ,  456. ,  925. ,  950. ,  250. ,
        170. ,  475. ,  145. ,  300. ,  242. ,  650. ,  120. ,  145. ,
        125. ,  850. ,    0. ,  840. ,  725. ,  680. ,  690. ,   80. ,
        120. , 1600. ,  265. ,   12.2,  345. ,  160. ,  850. ,   55. ,
         32. ,  975. ,  320. ,    9.7, 1100. ,   60. ,   19.7,  145. ,
        197. ,  272. ,  450. ,  700. ,  820. ,  161. ,   40. ,   85. ,
        100. ,  950. ,  140. ,  390. ,  150. ,  273. ,  900. ,  556. ,
        130. ,  115. , 1550. , 1650. ,  135. ,  450. ,  700. ,  340. ,
        800. ,  540. ,  363. ,  700. ,   87. ,    5.9,  770. ,  500. ,
        430. ,   90. ,  390. ,  300. ,  200. ,   12.2,  540. ,  169. ,
        130. ,  685. ,  306. ,    9.8,  300. ,   78. ,    9.8,  290. ,
      

In [11]:
x_test = df_test.drop(columns = 'Weight').values

In [12]:
y_test = df_test['Weight'].values

In [13]:
x_train.shape

(127, 5)

In [31]:
# Step 3: Define the neural network model
model = models.Sequential([
    # Input layer (flattened images)
    layers.InputLayer(input_shape=(5,)),
    
    # First hidden layer with 16 neurons and ReLU activation
    layers.Dense(5, activation='relu'),
    
    # Second hidden layer with 16 neurons and ReLU activation
    layers.Dense(5, activation='relu'),

    # Output layer with 10 neurons and softmax activation for classification
    layers.Dense(1)
])

# Step 4: Compile the model
model.compile(optimizer=optimizers.SGD(learning_rate=0.01),
              loss='mse', 
              metrics=['R2Score'])

# Step 5: Train the model
model.fit(x_train, y_train, epochs=20, batch_size=32)

# Step 6: Evaluate the model on the test set
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"Test accuracy: {test_acc}")
print(f"Test loss: {test_loss}")

Epoch 1/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0s/step - R2Score: -1.3112 - loss: 223699.6406   
Epoch 2/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 387us/step - R2Score: -1.0544 - loss: 231761.9844
Epoch 3/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - R2Score: -0.8082 - loss: 226118.4062 
Epoch 4/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - R2Score: -0.6618 - loss: 215980.3906 
Epoch 5/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0s/step - R2Score: -0.6828 - loss: 223895.6719  
Epoch 6/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - R2Score: -0.4691 - loss: 171842.8281 
Epoch 7/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - R2Score: -0.4837 - loss: 184347.3438 
Epoch 8/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - R2Score: -0.3808 - loss: 190353.7969 
Epoch 9/20
[1m4/4[0m