In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

2025-12-07 19:26:26.611024: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-12-07 19:26:26.618333: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-12-07 19:26:27.077210: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-12-07 19:26:28.688742: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To tur

In [2]:
ven_info = pd.read_csv("image_data/venomous_status_metadata.csv",
                       names=["nid", "class_id", "ven"], header=0)
train_info = pd.read_csv("image_data/train_images_metadata.csv", index_col=0)

relevant = train_info[["image_path", "class_id"]].merge(ven_info, on="class_id")
n_classes = ven_info['class_id'].nunique()
print(f"Number of classes: {n_classes}")

Number of classes: 296


In [3]:
# from tensorflow.keras.applications.resnet import preprocess_input
from tensorflow.keras.applications.efficientnet_v2 import preprocess_input

BATCH_SIZE = 32
IMG_SIZE = 480
def load_and_preprocess1(img_path, y1, img_size=(IMG_SIZE, IMG_SIZE), onehot=True):
    img = tf.io.read_file("image_data/train_images_large/" + img_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, img_size)
    img = preprocess_input(img)
    if onehot: y1 = tf.one_hot(y1, depth=n_classes)
    return img, y1

def make_dataset1(df, what):
    slices = (df['image_path'].values, df[what].values)
    ds = tf.data.Dataset.from_tensor_slices(slices)
    lp = load_and_preprocess1
    if what=="ven": lp = lambda x, y: load_and_preprocess1(x, y, onehot=False)
    ds = ds.map(lp, num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
    return ds


In [4]:
train_paths, test_val_paths = train_test_split(relevant, test_size=0.1, random_state=42) # does shuffle
val_paths, test_paths = train_test_split(test_val_paths, test_size=0.5, random_state=42) # does shuffle
cid_train_ds = make_dataset1(train_paths, "nid")
cid_train_ds.cache()
cid_val_ds = make_dataset1(val_paths, "nid")
cid_val_ds.cache()
cid_test_ds = make_dataset1(test_paths, "nid")
ven_train_ds = make_dataset1(train_paths, "ven")
ven_val_ds = make_dataset1(val_paths, "ven")
ven_test_ds = make_dataset1(test_paths, "ven")

I0000 00:00:1764893987.149786   13270 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10288 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3060, pci bus id: 0000:01:00.0, compute capability: 8.6


In [4]:
from tensorflow.keras.layers import Flatten, Conv2D, Dense, Input, MaxPooling2D, Dropout, Concatenate
from tensorflow.keras import losses
from tensorflow.keras import Model, Metric

In [6]:
class WeightedBinaryCrossentropy(losses.BinaryCrossentropy):
    def call(self, y_true, y_pred):
        l = super().call(y_true, y_pred)
        weights = y_true * 0.8 + (1 - y_true) * 0.2
        return tf.reduce_mean(l * weights)

osztályok: 296, ebből 66 mérgező

66k kígyó kép, ebből 13k mérgező (tehát a mérgezőség aránya kb ugyanannyi az adatok és a fajok között)

egyenletes eloszlást feltételezve (ami nem igaz de mindegy) ha random tippelek: 
- acc: 0.3379 %
- mérges súlyozott érték: 0.22 * (2 * 65/296 + 2 * 230/296) + 0.77 * (5 * 66/296 + 1 * 229/296) = 1.6754

In [5]:
class VenomousWeighted(Metric):
    def __init__(self, vens, name='ven_weighted', **kwargs):
        super().__init__(name=name, **kwargs)
        self.vens = tf.constant(vens, dtype=tf.int32)
        self.sum = self.add_weight(shape=(), initializer='zeros', name='vsum')
        self.db = self.add_weight(shape=(), initializer='zeros', name='db')
        self.metr = self.add_weight(
            shape=(),
            initializer='zeros',
            name='ven_metr'
        )

    def update_state(self, y_true, y_pred, sample_weight=None):
        y_true_idx = tf.argmax(y_true, axis=-1, output_type=tf.int32)
        y_pred_idx = tf.argmax(y_pred, axis=-1, output_type=tf.int32)
        
        true_ven = tf.gather(self.vens, y_true_idx)
        pred_ven = tf.gather(self.vens, y_pred_idx)

        vals = tf.where(tf.not_equal(y_true_idx, y_pred_idx),
                        tf.ones_like(y_pred_idx),
                        tf.zeros_like(y_pred_idx))
        vals = tf.where(true_ven - pred_ven == -1, vals + 1, vals)
        vals = tf.where(true_ven - pred_ven == 1, vals + 4, vals)
        vals = tf.where(true_ven + pred_ven == 2, vals + 1, vals)
        self.sum.assign_add(tf.reduce_sum(vals))
        self.db.assign_add(tf.cast(tf.shape(vals)[0], tf.float32))
        self.metr.assign(self.sum / self.db)

    def result(self):
        return self.metr

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, Model, losses, callbacks
from tensorflow.keras.applications import EfficientNetV2L

data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(factor=0.1),
    layers.RandomZoom(height_factor=0.2, width_factor=0.2),
    layers.RandomTranslation(height_factor=0.1, width_factor=0.1),
    layers.RandomContrast(factor=0.2)
])

base_model = EfficientNetV2L(weights='imagenet', include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))
base_model.trainable = False

inputs = tf.keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
x = data_augmentation(inputs)
x = base_model(x, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.3)(x)
x = layers.Dense(1024, activation="relu")(x)
outputs = layers.Dense(n_classes, activation="softmax", name='class_id')(x)

model = Model(inputs=inputs, outputs=outputs)

In [7]:
# --------------------phase  1
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss=losses.CategoricalCrossentropy(),
    metrics=['accuracy']
)

skip = False
if skip:
    model.load_weights('best_big_model_phase1.keras')
else:
    early_stop = callbacks.EarlyStopping(monitor='val_accuracy', patience=0, restore_best_weights=True)
    checkpoint = callbacks.ModelCheckpoint('best_big_model_phase1.keras', monitor='val_accuracy', save_best_only=True)
    history_phase1 = model.fit(cid_train_ds, validation_data=cid_val_ds, epochs=7, callbacks=[early_stop, checkpoint])

# ---------------------------------------phase 2
N_TOP_LAYERS_TO_UNFREEZE = 64
base_model.trainable = True 
total_layers = len(base_model.layers)
print(total_layers, N_TOP_LAYERS_TO_UNFREEZE)

for i, layer in enumerate(base_model.layers):
    if i >= total_layers - N_TOP_LAYERS_TO_UNFREEZE:
        layer.trainable = True
    else:
        layer.trainable = False
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),#kisebb learning rate
    loss=losses.CategoricalCrossentropy(),
    metrics=['accuracy']
)

checkpoint_phase2 = callbacks.ModelCheckpoint('best_big_model_phase2.keras', monitor='val_accuracy', save_best_only=True)
early_stop_phase2 = callbacks.EarlyStopping(monitor='val_accuracy', patience=0, restore_best_weights=True)

history_phase2 = model.fit(
    cid_train_ds,
    validation_data=cid_val_ds,
    epochs=10,  # finomhangolásnál több epoch
    callbacks=[checkpoint_phase2, early_stop_phase2]
)


# ----------------------------phase 3
N_TOP_LAYERS_TO_UNFREEZE = 128

for i, layer in enumerate(base_model.layers):
    if i >= total_layers - N_TOP_LAYERS_TO_UNFREEZE:
        layer.trainable = True
    else:
        layer.trainable = False
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5), #kisebb learning rate
    loss=losses.CategoricalCrossentropy(),
    metrics=['accuracy']
)

checkpoint_phase3 = callbacks.ModelCheckpoint('best_big_model_phase3.keras', monitor='val_accuracy', save_best_only=True)
early_stop_phase3 = callbacks.EarlyStopping(monitor='val_accuracy', patience=1, restore_best_weights=True)

history_phase3 = model.fit(
    cid_train_ds,
    validation_data=cid_val_ds,
    epochs=10,
    callbacks=[checkpoint_phase3, early_stop_phase3]
)

Epoch 1/7


I0000 00:00:1764894016.942386   13374 cuda_dnn.cc:529] Loaded cuDNN version 91600


[1m 362/1869[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m29:53[0m 1s/step - accuracy: 0.0996 - loss: 4.8127

Corrupt JPEG data: 558 extraneous bytes before marker 0xd9


[1m1581/1869[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m5:49[0m 1s/step - accuracy: 0.1494 - loss: 4.1297



[1m1869/1869[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2420s[0m 1s/step - accuracy: 0.1949 - loss: 3.6303 - val_accuracy: 0.2757 - val_loss: 3.0308
Epoch 2/7
[1m 362/1869[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m30:37[0m 1s/step - accuracy: 0.2581 - loss: 3.1459

Corrupt JPEG data: 558 extraneous bytes before marker 0xd9


[1m1581/1869[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m5:51[0m 1s/step - accuracy: 0.2614 - loss: 3.1150



[1m1869/1869[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2405s[0m 1s/step - accuracy: 0.2701 - loss: 3.0534 - val_accuracy: 0.3214 - val_loss: 2.8307
Epoch 3/7
[1m 362/1869[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m30:39[0m 1s/step - accuracy: 0.2947 - loss: 2.8931

Corrupt JPEG data: 558 extraneous bytes before marker 0xd9


[1m1581/1869[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m5:51[0m 1s/step - accuracy: 0.2983 - loss: 2.8794



[1m1869/1869[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2404s[0m 1s/step - accuracy: 0.3045 - loss: 2.8455 - val_accuracy: 0.3407 - val_loss: 2.7098
Epoch 4/7
[1m 361/1869[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m30:39[0m 1s/step - accuracy: 0.3166 - loss: 2.7509

Corrupt JPEG data: 558 extraneous bytes before marker 0xd9


[1m1580/1869[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m5:52[0m 1s/step - accuracy: 0.3238 - loss: 2.7325



[1m1869/1869[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2404s[0m 1s/step - accuracy: 0.3317 - loss: 2.6981 - val_accuracy: 0.3452 - val_loss: 2.6401
Epoch 5/7
[1m 362/1869[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m30:36[0m 1s/step - accuracy: 0.3425 - loss: 2.6567

Corrupt JPEG data: 558 extraneous bytes before marker 0xd9


[1m1581/1869[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m5:51[0m 1s/step - accuracy: 0.3453 - loss: 2.6267



[1m1869/1869[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2407s[0m 1s/step - accuracy: 0.3527 - loss: 2.5864 - val_accuracy: 0.3671 - val_loss: 2.5940
Epoch 6/7
[1m 362/1869[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m30:39[0m 1s/step - accuracy: 0.3713 - loss: 2.5461

Corrupt JPEG data: 558 extraneous bytes before marker 0xd9


[1m1581/1869[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m5:51[0m 1s/step - accuracy: 0.3671 - loss: 2.5307



[1m1869/1869[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2402s[0m 1s/step - accuracy: 0.3685 - loss: 2.5019 - val_accuracy: 0.3873 - val_loss: 2.5379
Epoch 7/7
[1m 362/1869[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m30:36[0m 1s/step - accuracy: 0.3875 - loss: 2.4401

Corrupt JPEG data: 558 extraneous bytes before marker 0xd9


[1m1581/1869[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m5:51[0m 1s/step - accuracy: 0.3867 - loss: 2.4429



[1m1869/1869[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2405s[0m 1s/step - accuracy: 0.3860 - loss: 2.4269 - val_accuracy: 0.3879 - val_loss: 2.5576
1028 64
Epoch 1/10


E0000 00:00:1764910864.936686   13270 meta_optimizer.cc:967] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inStatefulPartitionedCall/functional_1_1/efficientnetv2-l_1/block1b_drop_1/stateless_dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


[1m 362/1869[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m36:13[0m 1s/step - accuracy: 0.3850 - loss: 2.8078

Corrupt JPEG data: 558 extraneous bytes before marker 0xd9


[1m1581/1869[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m6:55[0m 1s/step - accuracy: 0.4480 - loss: 2.2738



[1m1869/1869[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2846s[0m 2s/step - accuracy: 0.5051 - loss: 1.9214 - val_accuracy: 0.5462 - val_loss: 1.8113
Epoch 2/10
[1m 362/1869[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m36:16[0m 1s/step - accuracy: 0.5613 - loss: 1.6609

Corrupt JPEG data: 558 extraneous bytes before marker 0xd9


[1m1581/1869[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m6:55[0m 1s/step - accuracy: 0.5763 - loss: 1.5866



[1m1869/1869[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2820s[0m 2s/step - accuracy: 0.5987 - loss: 1.4915 - val_accuracy: 0.5763 - val_loss: 1.7090
Epoch 3/10
[1m 362/1869[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m36:06[0m 1s/step - accuracy: 0.6317 - loss: 1.3492

Corrupt JPEG data: 558 extraneous bytes before marker 0xd9


[1m1581/1869[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m6:54[0m 1s/step - accuracy: 0.6392 - loss: 1.3075



[1m1869/1869[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2810s[0m 2s/step - accuracy: 0.6552 - loss: 1.2412 - val_accuracy: 0.5964 - val_loss: 1.6355
Epoch 4/10
[1m 362/1869[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m36:03[0m 1s/step - accuracy: 0.6761 - loss: 1.1318

Corrupt JPEG data: 558 extraneous bytes before marker 0xd9


[1m1581/1869[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m6:53[0m 1s/step - accuracy: 0.6889 - loss: 1.0980



[1m1869/1869[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2809s[0m 2s/step - accuracy: 0.7042 - loss: 1.0467 - val_accuracy: 0.6106 - val_loss: 1.6484
Epoch 5/10
[1m 362/1869[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m36:06[0m 1s/step - accuracy: 0.7310 - loss: 0.9428

Corrupt JPEG data: 558 extraneous bytes before marker 0xd9


[1m1581/1869[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m6:54[0m 1s/step - accuracy: 0.7358 - loss: 0.9248



[1m1869/1869[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2817s[0m 2s/step - accuracy: 0.7468 - loss: 0.8800 - val_accuracy: 0.6341 - val_loss: 1.6974
Epoch 6/10
[1m 362/1869[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m36:21[0m 1s/step - accuracy: 0.7663 - loss: 0.8106

Corrupt JPEG data: 558 extraneous bytes before marker 0xd9


[1m1581/1869[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m6:56[0m 1s/step - accuracy: 0.7729 - loss: 0.7822



[1m1869/1869[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2826s[0m 2s/step - accuracy: 0.7837 - loss: 0.7415 - val_accuracy: 0.6410 - val_loss: 1.6819
Epoch 7/10
[1m 362/1869[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m36:26[0m 1s/step - accuracy: 0.8001 - loss: 0.6878

Corrupt JPEG data: 558 extraneous bytes before marker 0xd9


[1m1581/1869[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m6:58[0m 1s/step - accuracy: 0.8025 - loss: 0.6693



[1m1869/1869[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2837s[0m 2s/step - accuracy: 0.8085 - loss: 0.6439 - val_accuracy: 0.6434 - val_loss: 1.6972
Epoch 8/10
[1m 175/1869[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m41:05[0m 1s/step - accuracy: 0.8261 - loss: 0.6011

KeyboardInterrupt: 

In [17]:
y_pred = model.predict(cid_test_ds)
y_pred_idx = np.argmax(y_pred, axis=1)
print(np.sum(y_pred_idx == test_paths['nid']) / len(test_paths))
print(np.sum(np.array(ven_info["ven"])[y_pred_idx] == test_paths["ven"]) / len(test_paths))

[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 1s/step
35826    0.000000
66409    0.000301
18664    0.000301
38358    0.000000
62084    0.000301
           ...   
36339    0.000301
29391    0.000000
21765    0.000000
49489    0.000301
38393    0.000301
Name: nid, Length: 3323, dtype: float64
35826    0.000301
66409    0.000301
18664    0.000301
38358    0.000301
62084    0.000301
           ...   
36339    0.000301
29391    0.000301
21765    0.000301
49489    0.000301
38393    0.000301
Name: ven, Length: 3323, dtype: float64


In [18]:
print(np.sum(y_pred_idx == test_paths['nid']) / len(test_paths))
print(np.sum(np.array(ven_info["ven"])[y_pred_idx] == test_paths["ven"]) / len(test_paths))

0.652422509780319
0.934697562443575


In [9]:
import os
from tensorflow.keras.applications.efficientnet_v2 import preprocess_input

filenames = [f for f in os.listdir("image_data/test_images_large") if os.path.isfile(os.path.join("image_data/test_images_large", f))]

BATCH_SIZE = 32
IMG_SIZE = 480
def load_and_preprocess1(img_path, img_size=(IMG_SIZE, IMG_SIZE), onehot=True):
    img = tf.io.read_file("image_data/test_images_large/" + img_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, img_size)
    img = preprocess_input(img)
    return img

ds = tf.data.Dataset.from_tensor_slices((filenames,))
ds = ds.map(load_and_preprocess1, num_parallel_calls=tf.data.AUTOTUNE)
ds = ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
final_test_ds = ds
fpred_nid_prob = model.predict(final_test_ds)

[1m461/461[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m539s[0m 1s/step


In [12]:
fpred_nid = np.argmax(fpred_nid_prob, axis=1)
fpred_ven = np.array(ven_info["ven"])[fpred_nid]
fpred_cid = np.array(ven_info["class_id"])[fpred_nid]
print(len(filenames), fpred_cid.shape, fpred_ven.shape)

14732 (14732,) (14732,)


In [15]:
outp_df = pd.DataFrame(list(zip(filenames, fpred_cid, fpred_ven)), columns=['image_path', 'class_id', 'venomous'])
outp_df.to_csv("prediction_file.csv", index = False)