In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import img_to_array
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, models
from tensorflow.keras.applications import EfficientNetB0
import os
from tqdm import tqdm
from PIL import Image
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.utils import Sequence

csv_file = '/kaggle/input/crop-eye/frames_data.csv'
image_directory = '/kaggle/input/crop-eye/out'
image_size = (240, 60)

df = pd.read_csv(csv_file)

df['Mouse_X'] = df['Mouse_X'] / 1920
df['Mouse_Y'] = df['Mouse_Y'] / 1080
labels = df[['Mouse_X', 'Mouse_Y']].values

image_paths = [os.path.join(image_directory, fname) for fname in df['Video_Filename']]

class DataGenerator(Sequence):
    def __init__(self, image_paths, labels, batch_size, image_size=(240, 60), shuffle=True):
        self.image_paths = image_paths
        self.labels = labels
        self.batch_size = batch_size
        self.image_size = image_size
        self.shuffle = shuffle
        self.indices = np.arange(len(self.image_paths))
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.image_paths) / self.batch_size))

    def __getitem__(self, index):
        batch_indices = self.indices[index * self.batch_size:(index + 1) * self.batch_size]
        batch_image_paths = [self.image_paths[i] for i in batch_indices]
        batch_labels = [self.labels[i] for i in batch_indices]

        batch_images = np.array([self._load_and_preprocess_image(img_path) for img_path in batch_image_paths])

        return batch_images, np.array(batch_labels)

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indices)

    def _load_and_preprocess_image(self, image_path):
        img = Image.open(image_path)
        img = img.resize(self.image_size)  
        img = img_to_array(img) / 255.0  
        return img

train_paths, val_paths, y_train, y_val = train_test_split(image_paths, labels, test_size=0.2, random_state=42)

batch_size = 32
train_generator = DataGenerator(train_paths, y_train, batch_size, image_size)
val_generator = DataGenerator(val_paths, y_val, batch_size, image_size)

base_model = EfficientNetB0(include_top=False, input_shape=(60, 240, 3), pooling='avg')

x = base_model.output
x = layers.Dense(128, activation='relu')(x)
x = layers.Dense(64, activation='relu')(x)
output = layers.Dense(2, activation='sigmoid')(x)  # Sigmoid로 X, Y 좌표 예측 (0~1 범위)

model = models.Model(inputs=base_model.input, outputs=output)

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['mae'])

model.summary()

checkpoint = ModelCheckpoint('best_model.weights.keras', monitor='val_loss', save_best_only=True, mode='min', verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True, verbose=1)

history = model.fit(train_generator, validation_data=val_generator, epochs=10, callbacks=[checkpoint, early_stopping])





Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


Epoch 1/10


  self._warn_if_super_not_called()
I0000 00:00:1727837927.373435      94 service.cc:145] XLA service 0x7dacb0002900 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1727837927.373495      94 service.cc:153]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0


[1m   2/2221[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2:23[0m 65ms/step - loss: 0.6935 - mae: 0.2442   

I0000 00:00:1727837974.838058      94 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m2221/2221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 152ms/step - loss: 0.5138 - mae: 0.0482
Epoch 1: val_loss improved from inf to 0.50194, saving model to best_model.weights.keras
[1m2221/2221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m509s[0m 191ms/step - loss: 0.5138 - mae: 0.0482 - val_loss: 0.5019 - val_mae: 0.0262
Epoch 2/10
[1m2221/2221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step - loss: 0.5012 - mae: 0.0228
Epoch 2: val_loss did not improve from 0.50194
[1m2221/2221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m178s[0m 80ms/step - loss: 0.5012 - mae: 0.0228 - val_loss: 0.5027 - val_mae: 0.0289
Epoch 3/10
[1m2221/2221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step - loss: 0.5004 - mae: 0.0202
Epoch 3: val_loss did not improve from 0.50194
[1m2221/2221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m177s[0m 79ms/step - loss: 0.5004 - mae: 0.0202 - val_loss: 0.5029 - val_mae: 0.0302
Epoch 4/10
[1m2221/2221[0m [32m

In [2]:
def predict_mouse_coordinates(image_path, model):
    test_image = Image.open(image_path)
    test_image = test_image.resize(image_size)
    test_image = img_to_array(test_image) / 255.0  # 이미지 정규화 (0~1 범위)
    test_image = np.expand_dims(test_image, axis=0)  # 배치 차원 추가
    predicted_coords = model.predict(test_image)

    # 예측 좌표 역정규화 (0~1 범위를 1920x1080으로 복구)
    predicted_coords = predicted_coords * [1920, 1080]
    return predicted_coords

test_image_path = '/kaggle/input/test-valid/99.png' 
predicted_coords = predict_mouse_coordinates(test_image_path, model)
print(f"Predicted Mouse Coordinates: {predicted_coords}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6s/step
Predicted Mouse Coordinates: [[597.12804794 301.22702837]]


In [3]:
import tensorflow as tf
print(tf.__version__)


2.16.1


In [5]:
model.save('itracing.h5')