In [None]:
# Import necessary libraries
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Flatten, Dense, Dropout
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split

In [None]:
# Define class labels
class_labels = ['area', 'heatmap', 'horizontal_bar', 'horizontal_interval', 'line', 'manhattan',
               'map', 'pie', 'scatter', 'scatter-line', 'surface', 'venn', 'vertical_bar',
               'vertical_box', 'vertical_interval']
num_classes = len(class_labels)

In [None]:
# Adjust paths to your train folder and class labels
train_path = "/kaggle/input/icpr-2022/ICPR 2022/ICPR2022_CHARTINFO_UB_PMC_TRAIN_v1.0/images"

# Data augmentation settings
train_datagen = ImageDataGenerator()

train_generator = train_datagen.flow_from_directory(
    train_path,
    target_size = (224, 224),
    batch_size = 32,
    class_mode = 'categorical',
    classes = class_labels,
    shuffle = False  
)

# Split training data into train and validation sets
train_data, val_data, train_labels, val_labels = train_test_split(
    train_datagen.filenames,
    train_datagen.classes,
    test_size=0.3,  # 30% for validation
    stratify=train_datagen.classes  # Maintain class balance
)

val_generator = (val_data, val_labels)

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layer import GlobalAveragePooling2D

base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
for layer in base_model.layers:
    layer.trainable = False
    
    
# Add your classifier layers (replace with your intended architecture)
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Flatten()(x)  # Flatten the output of the base model
x = Dense(512, activation='relu')(x)  # Example dense layer
output = Dense(num_classes, activation='softmax')(x)  # Output layer

# Create a new model with base_model.input as input and output as the output layer you created
model = Model(inputs=base_model.input, outputs=output)

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(
    train_generator,
    epochs=10,  # Adjust epochs as needed
    validation_data=val_generator
)

In [None]:
# Evaluate the model on the test set
test_datagen = ImageDataGenerator(rescale=1./255)  # No data augmentation for test

test_generator = test_datagen.flow_from_directory(
    directory = '/kaggle/input/icpr-2022-small/ICPR 2022 Small/Test',
    target_size = (224, 224),
    batch_size = 32,
    class_mode = 'categorical',
    classes = class_labels,
    shuffle = False  
)

test_loss, test_acc = model.evaluate(test_generator)
print('Test accuracy:', test_acc)