In [3]:
# extracting zip file from drive inside content folder
import zipfile
zip_ref = zipfile.ZipFile('/content/charts.zip', 'r')
zip_ref.extractall('/content')
zip_ref.close()

In [4]:
# importing required library 
import os
from PIL import Image
import matplotlib.pyplot as plt
import pandas as pd
import shutil
from keras.applications import VGG16
from keras.layers import Dense, Dropout, Flatten
from keras.models import Model
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator

In [5]:
# read the CSV file into a pandas dataframe
df = pd.read_csv('./charts/train_val.csv')
#df.head()

In [6]:
# Define the directory where the original chart images are located
source_dir = './charts/train_val/'

# Define the directory where the separate folders will be created
base_dir = './charts/training/'

test_dir = './charts/test'

# Get the distinct values from a specific column type and define the categories of the chart images
categories = df['type'].unique()

# print the distinct values
print("Chart categories: ",categories)

Chart categories:  ['vbar_categorical' 'hbar_categorical' 'line' 'pie' 'dot_line']


In [7]:
# Step1 : doing image classification
# Creating directory and moving data to specific category of charts folder
if os.path.isdir(base_dir):
    print("Directory is already created.")
else:
    # iterate over the rows in the dataframe and categorising the data set to specific classes of chart
    for image_index, row in df.iterrows():
      # extract the filename and label from the row
      filename = str(row['image_index'])+('.png')
      category = row['type']
      if category in categories and os.path.exists(base_dir+category):
        #print("Directory exists!")
        shutil.move(source_dir + filename, base_dir + category + '/' + filename)
      else:
        #print("Directory does not exist!")  
        os.makedirs(base_dir + category)
        shutil.move(source_dir + filename, base_dir + category + '/' + filename)

In [28]:
# Set the input size of the images
img_width, img_height = 224, 224
# Set the directories of the training and validation data
train_data_dir = base_dir
val_data_dir = test_dir
# Create an instance of the VGG16 model with pre-trained weights
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(img_width, img_height, 3))

In [29]:
# Freeze the layers of the pre-trained model
for layer in base_model.layers:
    layer.trainable = False

# Add new layers to the pre-trained model
x = base_model.output
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(1, activation='sigmoid')(x)

# Define the new model with the pre-trained model as its base and the new layers as its top
model = Model(inputs=base_model.input, outputs=predictions)

# Compile the model with a binary crossentropy loss and an Adam optimizer
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=1e-5), metrics=['accuracy'])

In [30]:
# Performing data augmentation for the training data and validation data
train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)

val_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
# Defining the batch size
batch_size = 32

# Set the number of training and validation samples
nb_train_samples = 800
nb_val_samples = 200

# Number of epochs setting
epochs = 10

# Training the model
history = model.fit(
    train_datagen.flow_from_directory(train_data_dir, target_size=(img_width, img_height), batch_size=batch_size, class_mode='categorical'),
    steps_per_epoch=nb_train_samples // batch_size,
    epochs=epochs,
    validation_data=val_datagen.flow_from_directory(val_data_dir, target_size=(img_width, img_height), batch_size=batch_size, class_mode='categorical'),
    validation_steps=nb_val_samples // batch_size)

In [18]:
# Evaluate the model with the test dataset
test_datagen = ImageDataGenerator(rescale=1./255)
test_data_dir =test_dir

In [24]:
test_data_generator = test_datagen.flow_from_directory(
    test_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='categorical')

Found 0 images belonging to 0 classes.


In [None]:
test_loss, test_acc = model.evaluate(test_data_generator)
print('Test accuracy=', test_acc)
print('Test loss=', test_loss)

Test accuracy: 1.0
Test loss: 0.0007767498027533293
