In [1]:
#Mounting google drive

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
#Importing Modules

import os
import json
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical

In [3]:
# Set the path to the dataset

base_path = '/content/drive/MyDrive/Research/Graphene_images/Graphene/'
train_images_path = os.path.join(base_path, 'train2019')
val_images_path = os.path.join(base_path, 'val2019')
annotations_path = os.path.join(base_path, 'annotations')

In [4]:
# Load the annotations

with open(os.path.join(annotations_path, 'instances_train2019.json')) as f:
    train_annotations = json.load(f)

In [5]:
def parse_annotations_train(annotations):
    image_data = []
    labels = []

    # Create a dictionary to map image_id to file_name
    image_id_to_file_name = {img['id']: img['file_name'] for img in annotations['images']}

    for ann in annotations['annotations']:
        image_id = ann['image_id']
        category_id = ann['category_id']
        file_name = image_id_to_file_name.get(image_id)

        if file_name:
            image_path = os.path.join(train_images_path, file_name)
            #print(f"Checking path: {image_path}")  # Debugging line
            if os.path.exists(image_path):
                image_data.append(image_path)
                labels.append(category_id)
            else:
                print(f"File does not exist: {image_path}")  # Debugging line

    return image_data, labels

train_image_data, train_labels = parse_annotations_train(train_annotations)

# Create a DataFrame with consistent column names
train_df = pd.DataFrame({'image_path': train_image_data, 'label': train_labels})

In [6]:
# Data augmentation and normalization for training
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

# Ensure the label column is of type string
train_df['label'] = train_df['label'].astype(str)

# Create a data generator for the training set
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col='image_path',
    y_col='label',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)

Found 3719 validated image filenames belonging to 3 classes.


In [7]:
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D

# Load the MobileNetV2 model
base_model = MobileNetV2(input_shape=(224, 224, 3), include_top=False, weights='imagenet')

# Freeze the base model
base_model.trainable = False

# Add custom layers on top
model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(1024, activation='relu'),
    Dense(3, activation='softmax')  # 3 classes for "mono graphene", "few graphene", "thick graphene"
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
epochs = 10

history = model.fit(
    train_generator,
    epochs=epochs,
    steps_per_epoch=len(train_generator)
)


Epoch 1/10


  self._warn_if_super_not_called()


[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m417s[0m 3s/step - accuracy: 0.3833 - loss: 1.8701
Epoch 2/10
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 287us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 3/10


  self.gen.throw(typ, value, traceback)


[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m113s[0m 656ms/step - accuracy: 0.4537 - loss: 1.0179
Epoch 4/10
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 118us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 5/10
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m145s[0m 690ms/step - accuracy: 0.4745 - loss: 1.0026
Epoch 6/10
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 7/10
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 687ms/step - accuracy: 0.4743 - loss: 0.9977
Epoch 8/10
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 9/10
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 665ms/step - accuracy: 0.4720 - loss: 0.9998
Epoch 10/10
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70us/step - accuracy: 0.0000e+00 - loss: 0.0000e+0

In [14]:
# Load the validation annotations
with open(os.path.join(annotations_path, 'instances_val2019.json')) as f:
    val_annotations = json.load(f)

In [15]:
def parse_annotations_val(annotations):
    image_data = []
    labels = []

    # Create a dictionary to map image_id to file_name
    image_id_to_file_name = {img['id']: img['file_name'] for img in annotations['images']}

    for ann in annotations['annotations']:
        image_id = ann['image_id']
        category_id = ann['category_id']
        file_name = image_id_to_file_name.get(image_id)

        if file_name:
            image_path = os.path.join(val_images_path, file_name)
            #print(f"Checking path: {image_path}")  # Debugging line
            if os.path.exists(image_path):
                image_data.append(image_path)
                labels.append(category_id)
            else:
                print(f"File does not exist: {image_path}")  # Debugging line
    return image_data, labels

In [16]:
# Prepare the validation data
val_image_data, val_labels = parse_annotations_val(val_annotations)

# Create a DataFrame for validation
val_df = pd.DataFrame({'image_path': val_image_data, 'label': val_labels})

In [17]:
# Create a data generator for the validation set
val_datagen = ImageDataGenerator(rescale=1./255)

# Ensure the label column is of type string
val_df['label'] = val_df['label'].astype(str)

# Create a data generator for the training set
val_generator = val_datagen.flow_from_dataframe(
    dataframe=val_df,
    x_col='image_path',
    y_col='label',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)

Found 1093 validated image filenames belonging to 3 classes.


In [18]:
# Evaluate the model
val_loss, val_accuracy = model.evaluate(val_generator)
print(f"Validation Accuracy: {val_accuracy:.2f}")

  self._warn_if_super_not_called()


[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 2s/step - accuracy: 0.3972 - loss: 1.0978
Validation Accuracy: 0.41


------------------------------------------------------------------------------------------

From scratch

In [11]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Define the CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),

    Flatten(),

    Dense(256, activation='relu'),
    Dropout(0.5),  # Dropout to prevent overfitting

    Dense(3, activation='softmax')  # 3 output classes: mono graphene, few graphene, thick graphene
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [10]:
# Set number of epochs
epochs = 10

# Train the model
history = model.fit(
    train_generator,
    epochs=epochs,
    steps_per_epoch=len(train_generator)
)


Epoch 1/10


  self._warn_if_super_not_called()


[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m358s[0m 2s/step - accuracy: 0.4012 - loss: 1.3456
Epoch 2/10
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 162us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 3/10


  self.gen.throw(typ, value, traceback)


[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 676ms/step - accuracy: 0.4219 - loss: 1.0498
Epoch 4/10
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 5/10
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 666ms/step - accuracy: 0.4126 - loss: 1.0588
Epoch 6/10
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 7/10
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 656ms/step - accuracy: 0.4393 - loss: 1.0449
Epoch 8/10
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 9/10
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 680ms/step - accuracy: 0.4226 - loss: 1.0618
Epoch 10/10
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00


In [19]:
# Evaluate the model on validation data
val_loss, val_accuracy = model.evaluate(val_generator)
print(f"Validation Accuracy: {val_accuracy:.2f}")

[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 327ms/step - accuracy: 0.4113 - loss: 1.0975
Validation Accuracy: 0.41
