In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf


df = pd.read_csv("/kaggle/input/realdata/merged.csv")
df["image_path"] = df["image_id"].astype(str) + ".png"

conditions = [
    df["angle"] < 0.5,
    df["angle"] == 0.5,
    df["angle"] > 0.5
]
choices = [0, 1, 2]

df["angle_bin"] = np.select(conditions, choices)


df["stratify_label"] = df["speed"].astype(str) + "_" + df["angle_bin"].astype(str)


label_counts = df["stratify_label"].value_counts()
valid_labels = label_counts[label_counts >= 2].index
df = df[df["stratify_label"].isin(valid_labels)].reset_index(drop=True)


df_temp, df_test = train_test_split(
    df,
    test_size=0.1,
    stratify=df["stratify_label"],
    random_state=42
)


df_train, df_val = train_test_split(
    df_temp,
    test_size=0.2222,
    stratify=df_temp["stratify_label"],
    random_state=42
)

def get_tuple_distribution(df_part, name):
    counts = df_part.groupby(["speed", "angle_bin"]).size().reset_index(name="count")
    counts = counts.sort_values(by=["speed", "angle_bin"]).reset_index(drop=True)
    print(f"\n{name} labels distribution（speed, angle_bin）:")
    print(counts)

get_tuple_distribution(df_train, "Train")
get_tuple_distribution(df_val, "Validation")
get_tuple_distribution(df_test, "Test")

print(f"Train size: {len(df_train)} ({len(df_train) / len(df):.2%})")
print(f"Val size:   {len(df_val)} ({len(df_val) / len(df):.2%})")
print(f"Test size:  {len(df_test)} ({len(df_test) / len(df):.2%})")

# 转为 tf.data.Dataset 

def df_to_dataset(df_part):
    image_paths = df_part["image_path"].values
    angles = df_part["angle"].astype("float32").values
    speeds = df_part["speed"].astype("float32").values
    return tf.data.Dataset.from_tensor_slices((image_paths, angles, speeds))

train_dataset = df_to_dataset(df_train)
val_dataset = df_to_dataset(df_val)
test_dataset = df_to_dataset(df_test)




from tensorflow.keras.applications.mobilenet_v2 import preprocess_input

def apply_augmentation(image):
    image = tf.image.random_brightness(image, max_delta=0.2)      
    image = tf.image.random_contrast(image, lower=0.8, upper=1.2)  
    image = tf.image.random_saturation(image, lower=0.8, upper=1.2) 
    image = tf.image.random_hue(image, max_delta=0.05)         
    image = tf.clip_by_value(image, 0.0, 255.0)
    return image


def parse_image(image_path, angle, speed, augment=False):
    image_folder = "/kaggle/input/realdata/training_data/training_data"
    path = tf.strings.join([image_folder, "/", image_path])
    img = tf.io.read_file(path)
    img = tf.image.decode_png(img, channels=3) 
    img = tf.image.resize(img, (224, 224))  
    if augment:
        do_augment = tf.less(tf.random.uniform([], 0, 1.0), 0.2)
        img = tf.cond(do_augment, lambda: apply_augmentation(img), lambda: img)


    img = preprocess_input(img)

    return img, {"angle_output": angle, "speed_output": speed}


batch_size = 32 


train_dataset = (
    train_dataset
    .map(lambda x, y, z: parse_image(x, y, z, augment=True), num_parallel_calls=tf.data.AUTOTUNE)
    .shuffle(2000)
    .batch(batch_size)
    .prefetch(tf.data.AUTOTUNE)
)

val_dataset = (
    val_dataset
    .map(parse_image, num_parallel_calls=tf.data.AUTOTUNE)
    .batch(batch_size)
    .prefetch(tf.data.AUTOTUNE)
)


test_dataset = (
    test_dataset
    .map(parse_image, num_parallel_calls=tf.data.AUTOTUNE)
    .batch(batch_size)
    .prefetch(tf.data.AUTOTUNE)
)

2025-05-07 11:48:00.941742: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746618481.144516      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746618481.199299      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered



Train labels distribution（speed, angle_bin）:
   speed  angle_bin  count
0    0.0          0    366
1    0.0          1    556
2    0.0          2   1546
3    1.0          0   3814
4    1.0          1   2244
5    1.0          2   6371

Validation labels distribution（speed, angle_bin）:
   speed  angle_bin  count
0    0.0          0    104
1    0.0          1    159
2    0.0          2    442
3    1.0          0   1090
4    1.0          1    641
5    1.0          2   1820

Test labels distribution（speed, angle_bin）:
   speed  angle_bin  count
0    0.0          0     52
1    0.0          1     80
2    0.0          2    221
3    1.0          0    545
4    1.0          1    321
5    1.0          2    910
Train size: 14897 (70.00%)
Val size:   4256 (20.00%)
Test size:  2129 (10.00%)


I0000 00:00:1746618493.052582      31 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


In [None]:
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Model
from tensorflow.keras import layers, models, backend as K
from tensorflow.keras.regularizers import l2

def rmse(y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true)))



base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))


for layer in base_model.layers[9:125]:  
    layer.trainable = False

x = base_model.output
print(x.shape)  
x = layers.AveragePooling2D(pool_size=(2, 2), strides=1, padding='valid')(x)  
x = layers.Conv2D(512, kernel_size=1, padding='same', activation='relu')(x)   # (6, 6, 512)
x = layers.Reshape((36, 512))(x)
print(x.shape)  


# Transformer 部分
transformer = layers.MultiHeadAttention(num_heads=4, key_dim=16)(x, x)  
transformer = layers.Dropout(0.1)(transformer)
transformer = layers.LayerNormalization(epsilon=1e-6)(transformer)

# 残差连接
x_transformer = x  
x_transformer = layers.Add()([x_transformer, transformer])  
x_transformer = layers.LayerNormalization(epsilon=1e-6)(x_transformer)


x_pooled = layers.GlobalAveragePooling1D()(x_transformer)  
print(x_pooled.shape)  

# angle 
angle_output = layers.Dense(1, activation='linear', kernel_regularizer=l2(0.01), name='angle_output')(x_pooled)  

# speed 
speed_output = layers.Dense(1, activation='sigmoid', kernel_regularizer=l2(0.01), name='speed_output')(x_pooled)  


model = models.Model(inputs=base_model.input, outputs=[angle_output, speed_output])



#model.summary()

#for layer in model.layers:
#    print(f"{layer.name}: Trainable = {layer.trainable}")

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-2),
    loss={
        "angle_output": rmse,
        "speed_output": "binary_crossentropy"
    },
    metrics={
        "angle_output": [rmse],         
        "speed_output": ["accuracy"]   
    },

)

lr_reducer = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.8,
    patience=3,    
    verbose=1
)
initial_epochs = 30
history_initial = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=initial_epochs,
    callbacks=[lr_reducer]
)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
(None, 7, 7, 1280)
(None, 36, 512)
(None, 512)
Epoch 1/30


I0000 00:00:1746618530.092163      88 service.cc:148] XLA service 0x7872f42159e0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1746618530.093136      88 service.cc:156]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1746618531.941102      88 cuda_dnn.cc:529] Loaded cuDNN version 90300
E0000 00:00:1746618536.272383      88 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1746618536.468910      88 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.


[1m  2/466[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m28s[0m 62ms/step - angle_output_loss: 3.6931 - angle_output_rmse: 3.7096 - loss: 4.6973 - speed_output_accuracy: 0.5391 - speed_output_loss: 0.9629   

I0000 00:00:1746618542.914656      88 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m465/466[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 89ms/step - angle_output_loss: 0.9209 - angle_output_rmse: 1.0086 - loss: 1.1206 - speed_output_accuracy: 0.9373 - speed_output_loss: 0.1769

E0000 00:00:1746618588.315640      88 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1746618588.514157      88 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.


[1m466/466[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 144ms/step - angle_output_loss: 0.9184 - angle_output_rmse: 1.0062 - loss: 1.1177 - speed_output_accuracy: 0.9374 - speed_output_loss: 0.1766 - val_angle_output_loss: 0.2179 - val_angle_output_rmse: 0.2435 - val_loss: 1.2832 - val_speed_output_accuracy: 0.8344 - val_speed_output_loss: 1.0565 - learning_rate: 0.0100
Epoch 2/30
[1m466/466[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 69ms/step - angle_output_loss: 0.1163 - angle_output_rmse: 0.2707 - loss: 0.1803 - speed_output_accuracy: 0.9847 - speed_output_loss: 0.0561 - val_angle_output_loss: 0.2552 - val_angle_output_rmse: 0.2972 - val_loss: 1.2214 - val_speed_output_accuracy: 0.8344 - val_speed_output_loss: 0.9592 - learning_rate: 0.0100
Epoch 3/30
[1m466/466[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 69ms/step - angle_output_loss: 0.1086 - angle_output_rmse: 0.2703 - loss: 0.1629 - speed_output_accuracy: 0.9867 - speed_output_loss: 0.0468 - 

In [None]:
model.save("my_full_model6.h5")
print("Saved full model to my_full_model_17/03.h5")




converter = tf.lite.TFLiteConverter.from_keras_model(model)



tflite_model = converter.convert()
with open("converted_model6.tflite", "wb") as f:
    f.write(tflite_model)

print("Model successfully converted to TensorFlow Lite and saved as 'converted_model.tflite'")