In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        pass

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D, Input, Lambda,concatenate
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.applications import Xception, InceptionResNetV2, NASNetLarge, InceptionV3
from tensorflow.keras.applications.xception import preprocess_input as xception_preprocessor
from tensorflow.keras.applications.inception_resnet_v2 import preprocess_input as inc_resnet_preprocessor
from tensorflow.keras.applications.nasnet import preprocess_input as nasnet_preprocessor
from tensorflow.keras.applications.inception_v3 import preprocess_input as inception_preprocessor
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [3]:
# Load CSV and split into training/validation sets
train_df = pd.read_csv('/kaggle/input/image-classification/train.csv')
test_df = pd.read_csv('/kaggle/input/image-classification/test.csv')

In [4]:
train_df, val_df = train_test_split(train_df, test_size=0.2, stratify=train_df['class'], random_state=42)

In [5]:
# Tạo ImageDataGenerator với các kỹ thuật augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    brightness_range=[0.8, 1.2],
    fill_mode='nearest'
)

# Generator cho validation data 
val_datagen = ImageDataGenerator(rescale=1./255)

In [6]:
train_generator = train_datagen.flow_from_dataframe(
    train_df,
    directory='/kaggle/input/image-classification/train',
    x_col='filepaths',
    y_col='class',
    target_size=(331, 331),
    batch_size=128,
    class_mode='categorical'
)

val_generator = val_datagen.flow_from_dataframe(
    val_df,
    directory='/kaggle/input/image-classification/train',
    x_col='filepaths',
    y_col='class',
    target_size=(331, 331),
    batch_size=128,
    class_mode='categorical'
)
# Input chung cho các mô hình
input_layer = Input(shape=(331, 331, 3))

Found 6356 validated image filenames belonging to 70 classes.
Found 1590 validated image filenames belonging to 70 classes.


In [7]:
# Mô hình Xception
base_model_xception = tf.keras.applications.Xception(weights='imagenet', include_top=False, input_tensor=input_layer)
xception_output = GlobalAveragePooling2D()(base_model_xception.output)

# Mô hình InceptionResNetV2
base_model_incep_resnet = tf.keras.applications.InceptionResNetV2(weights='imagenet', include_top=False, input_tensor=input_layer)
incep_resnet_output = GlobalAveragePooling2D()(base_model_incep_resnet.output)

# Mô hình NASNetLarge
base_model_nasnet = tf.keras.applications.NASNetLarge(weights='imagenet', include_top=False, input_tensor=input_layer)
nasnet_output = GlobalAveragePooling2D()(base_model_nasnet.output)

# Mô hình InceptionV3
base_model_incep_v3 = tf.keras.applications.InceptionV3(weights='imagenet', include_top=False, input_tensor=input_layer)
incep_v3_output = GlobalAveragePooling2D()(base_model_incep_v3.output)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m83683744/83683744[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_resnet_v2/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m219055592/219055592[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/nasnet/NASNet-large-no-top.h5
[1m343610240/343610240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m87910968/87910968[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 0us/step


In [8]:
# Kết hợp đầu ra từ 4 mô hình
combined_output = concatenate([xception_output, incep_resnet_output, nasnet_output, incep_v3_output])

In [9]:
# Thêm các lớp fully connected
x = Dense(512, activation='relu')(combined_output)
x = Dropout(0.5)(x)
output_layer = Dense(70, activation='softmax')(x)  # 70 classes

# Tạo mô hình kết hợp
model = Model(inputs=input_layer, outputs=output_layer)

# Đóng băng các lớp của các mô hình pre-trained
for base_model in [base_model_xception, base_model_incep_resnet, base_model_nasnet, base_model_incep_v3]:
    for layer in base_model.layers:
        layer.trainable = False

# Compile mô hình
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [10]:
# Định nghĩa EarlyStopping và ReduceLROnPlateau
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.01, patience=3, min_lr=1e-5, verbose=1)

# Huấn luyện mô hình kết hợp
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=30,
    callbacks=[early_stopping, reduce_lr]
)

Epoch 1/30


I0000 00:00:1729512313.400294      95 service.cc:145] XLA service 0x78d5000761e0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1729512313.400350      95 service.cc:153]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1729512420.150867      95 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m49/50[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m4s[0m 4s/step - accuracy: 0.3653 - loss: 2.9034

2024-10-21 12:10:57.382011: E external/local_xla/xla/service/slow_operation_alarm.cc:65] Trying algorithm eng12{k5=1,k6=0,k7=1,k10=4} for conv (f32[84,128,163,163]{3,2,1,0}, u8[0]{0}) custom-call(f32[84,128,163,163]{3,2,1,0}, f32[128,1,3,3]{3,2,1,0}), window={size=3x3 pad=1_1x1_1}, dim_labels=bf01_oi01->bf01, feature_group_count=128, custom_call_target="__cudnn$convForward", backend_config={"operation_queue_id":"0","wait_on_operation_queues":[],"cudnn_conv_backend_config":{"conv_result_scale":1,"activation_mode":"kNone","side_input_scale":0,"leakyrelu_alpha":0}} is taking a while...
2024-10-21 12:10:58.235529: E external/local_xla/xla/service/slow_operation_alarm.cc:133] The operation took 1.853627102s
Trying algorithm eng12{k5=1,k6=0,k7=1,k10=4} for conv (f32[84,128,163,163]{3,2,1,0}, u8[0]{0}) custom-call(f32[84,128,163,163]{3,2,1,0}, f32[128,1,3,3]{3,2,1,0}), window={size=3x3 pad=1_1x1_1}, dim_labels=bf01_oi01->bf01, feature_group_count=128, custom_call_target="__cudnn$convForward",

[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m630s[0m 9s/step - accuracy: 0.3721 - loss: 2.8672 - val_accuracy: 0.9258 - val_loss: 0.2539 - learning_rate: 0.0010
Epoch 2/30
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m277s[0m 5s/step - accuracy: 0.7148 - loss: 1.0575 - val_accuracy: 0.9358 - val_loss: 0.2121 - learning_rate: 0.0010
Epoch 3/30
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m273s[0m 5s/step - accuracy: 0.7402 - loss: 0.9216 - val_accuracy: 0.9415 - val_loss: 0.2028 - learning_rate: 0.0010
Epoch 4/30
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m276s[0m 5s/step - accuracy: 0.7641 - loss: 0.8329 - val_accuracy: 0.9453 - val_loss: 0.1993 - learning_rate: 0.0010
Epoch 5/30
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m273s[0m 5s/step - accuracy: 0.7710 - loss: 0.7812 - val_accuracy: 0.9403 - val_loss: 0.2039 - learning_rate: 0.0010
Epoch 6/30
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m272s[0m 5s/step -

In [11]:
test_df = pd.read_csv('/kaggle/input/image-classification/test.csv')

In [12]:
test_datagen = ImageDataGenerator(rescale=1./255)

In [13]:
# Tạo ImageDataGenerator cho dữ liệu test
test_generator = test_datagen.flow_from_dataframe(
    test_df,
    directory='/kaggle/input/image-classification/test',
    x_col='filepaths',  # Tên cột chứa đường dẫn file ảnh
    target_size=(331, 331),
    class_mode=None,  # Không có nhãn cho dữ liệu test
    batch_size=128,
    shuffle=False
)

Found 1400 validated image filenames.


In [14]:
classes_mapped = {key: class_name for class_name, key in train_generator.class_indices.items()}
# Dự đoán kết quả
predictions = model.predict(test_generator)
predicted_classes = np.argmax(predictions, axis=1)

# Chuyển đổi predicted_classes thành định dạng class_X
predicted_labels = [classes_mapped[pred] for pred in predicted_classes]

[1m10/11[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m2s[0m 3s/step

2024-10-21 14:14:09.140188: E external/local_xla/xla/service/slow_operation_alarm.cc:65] Trying algorithm eng12{k5=1,k6=0,k7=1,k10=8} for conv (f32[120,64,163,163]{3,2,1,0}, u8[0]{0}) custom-call(f32[120,64,163,163]{3,2,1,0}, f32[64,1,3,3]{3,2,1,0}), window={size=3x3 pad=1_1x1_1}, dim_labels=bf01_oi01->bf01, feature_group_count=64, custom_call_target="__cudnn$convForward", backend_config={"operation_queue_id":"0","wait_on_operation_queues":[],"cudnn_conv_backend_config":{"conv_result_scale":1,"activation_mode":"kNone","side_input_scale":0,"leakyrelu_alpha":0}} is taking a while...
2024-10-21 14:14:09.472309: E external/local_xla/xla/service/slow_operation_alarm.cc:133] The operation took 1.332323045s
Trying algorithm eng12{k5=1,k6=0,k7=1,k10=8} for conv (f32[120,64,163,163]{3,2,1,0}, u8[0]{0}) custom-call(f32[120,64,163,163]{3,2,1,0}, f32[64,1,3,3]{3,2,1,0}), window={size=3x3 pad=1_1x1_1}, dim_labels=bf01_oi01->bf01, feature_group_count=64, custom_call_target="__cudnn$convForward", bac

[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m183s[0m 14s/step


In [15]:
# Tạo DataFrame kết quả theo đúng format
submission = pd.DataFrame({
    'ID': test_df['ID'],  # Sử dụng cột ID từ file test
    'TARGET': predicted_labels  # Nhãn dự đoán
})

# Xuất ra file submission.csv
submission.to_csv('/kaggle/working/submission.csv', index=False)  # Đảm bảo đường dẫn đúng

# In vài dòng đầu của file submission để kiểm tra
print(submission.head())

   ID    TARGET
0   0  class_58
1   1  class_58
2   2   class_7
3   3  class_17
4   4  class_71
