In [6]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, InputLayer, GlobalAveragePooling2D
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras import optimizers
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger

import warnings
#warnings.filterwarnings("ignore")
print(tf.__version__)

2.10.0


In [32]:
dir_path = r"C:\Users\acer\Desktop\Data_2D\data_x"
results_dir = os.path.join(dir_path, 'logs/EfficientNet-B5/')
models_dir = os.path.join(dir_path, 'models/EfficientNet-B5/')

if not os.path.isdir(results_dir):
    os.makedirs(results_dir)
    
if not os.path.isdir(models_dir):
    os.makedirs(models_dir)

In [50]:
# define the location of your dataset

TRAIN_PATH = r"C:\Users\acer\Desktop\Data_2D\data_x\train"
TRAIN_LABEL_PATH = r"C:\Users\acer\Desktop\Data_2D\data_x\train_classification.csv"

VAL_PATH = r"C:\Users\acer\Desktop\Data_2D\data_x\validation"
VAL_LABEL_PATH = r"C:\Users\acer\Desktop\Data_2D\data_x\validation_classification.csv"

IMG_DIM = (224,224)
INPUT_SHAPE = (224,224,3)

BATCH_SIZE = 32
EPOCH = 20
x_axis_inc = 1 # for plotting the training acc and loss

In [34]:
import pandas as pd

def expand_dataframe(df):
    # Create a list to hold the new rows
    new_rows = []
    
    # Iterate through each row in the original dataframe
    for _, row in df.iterrows():
        # Duplicate the row 234 times (for indices 0-233)
        for i in range(224):
            new_row = row.copy()
            new_row['ID'] = f"{row['ID']}_x_{i:03d}"  # Wrap the original ID and add suffix
            new_rows.append(new_row)
    
    # Create a new dataframe from the list of new rows
    new_df = pd.DataFrame(new_rows)
    
    return new_df

train_original_df = pd.read_csv(TRAIN_LABEL_PATH)
train_c_erosion_label = train_original_df[['ID','c_erosion']]
# Create the new DataFrame
train_df = expand_dataframe(train_c_erosion_label)

train_df.head()

                  ID  c_erosion
0    63-2829 L_x_000          0
0    63-2829 L_x_001          0
0    63-2829 L_x_002          0
0    63-2829 L_x_003          0
0    63-2829 L_x_004          0
..               ...        ...
253  58-9834 R_x_219          1
253  58-9834 R_x_220          1
253  58-9834 R_x_221          1
253  58-9834 R_x_222          1
253  58-9834 R_x_223          1

[56896 rows x 2 columns]


In [35]:
data_labels = train_df
target_labels = data_labels['c_erosion']
data_labels['image_path'] =  data_labels.apply(lambda row: (os.path.join(TRAIN_PATH, str(row['ID'])) + '.jpg'), axis=1)
data_labels.head()

Unnamed: 0,ID,c_erosion,image_path
0,63-2829 L_x_000,0,C:\Users\acer\Desktop\Data_2D\data_x\train\63-...
0,63-2829 L_x_001,0,C:\Users\acer\Desktop\Data_2D\data_x\train\63-...
0,63-2829 L_x_002,0,C:\Users\acer\Desktop\Data_2D\data_x\train\63-...
0,63-2829 L_x_003,0,C:\Users\acer\Desktop\Data_2D\data_x\train\63-...
0,63-2829 L_x_004,0,C:\Users\acer\Desktop\Data_2D\data_x\train\63-...


In [None]:
import pandas as pd
import os

def check_valid_files(df, column_name='image_path'):
    # Create a new column for validity
    df['is_valid_file'] = df[column_name].apply(os.path.isfile)
    
    # Print summary
    total = len(df)
    valid = df['is_valid_file'].sum()
    invalid = total - valid
    
    print(f"Total files: {total}")
    print(f"Valid files: {valid}")
    print(f"Invalid files: {invalid}")
    
    # If there are invalid files, you can get them like this:
    if invalid > 0:
        print("\nInvalid files:")
        invalid_files = df[~df['is_valid_file']][column_name]
        for file in invalid_files:
            print(file)
    
    return df

check_valid_file_df = check_valid_files(data_labels)

# You can access the results in the DataFrame
check_valid_file_df.head(20)

In [54]:
val_original_df = pd.read_csv(VAL_LABEL_PATH)
val_c_erosion_label = val_original_df[['ID','c_erosion']]
# Create the new DataFrame
val_df = expand_dataframe(val_c_erosion_label)

val_df.head()

Unnamed: 0,ID,c_erosion
0,57-16393 R_x_000,0
0,57-16393 R_x_001,0
0,57-16393 R_x_002,0
0,57-16393 R_x_003,0
0,57-16393 R_x_004,0


In [57]:
val_labels = val_df[['ID','c_erosion']]
target_val_labels = val_labels['c_erosion']
val_labels['image_path'] =  val_labels.apply(lambda row: (os.path.join(VAL_PATH, str(row['ID'])) + '.jpg'), axis=1)
val_labels.head()

Unnamed: 0,ID,c_erosion,image_path
0,57-16393 R_x_000,0,C:\Users\acer\Desktop\Data_2D\data_x\validatio...
0,57-16393 R_x_001,0,C:\Users\acer\Desktop\Data_2D\data_x\validatio...
0,57-16393 R_x_002,0,C:\Users\acer\Desktop\Data_2D\data_x\validatio...
0,57-16393 R_x_003,0,C:\Users\acer\Desktop\Data_2D\data_x\validatio...
0,57-16393 R_x_004,0,C:\Users\acer\Desktop\Data_2D\data_x\validatio...


In [58]:
check_valid_file_df = check_valid_files(val_labels)
check_valid_file_df.head(20)

Total files: 12096
Valid files: 12096
Invalid files: 0


Unnamed: 0,ID,c_erosion,image_path,is_valid_file
0,57-16393 R_x_000,0,C:\Users\acer\Desktop\Data_2D\data_x\validatio...,True
0,57-16393 R_x_001,0,C:\Users\acer\Desktop\Data_2D\data_x\validatio...,True
0,57-16393 R_x_002,0,C:\Users\acer\Desktop\Data_2D\data_x\validatio...,True
0,57-16393 R_x_003,0,C:\Users\acer\Desktop\Data_2D\data_x\validatio...,True
0,57-16393 R_x_004,0,C:\Users\acer\Desktop\Data_2D\data_x\validatio...,True
0,57-16393 R_x_005,0,C:\Users\acer\Desktop\Data_2D\data_x\validatio...,True
0,57-16393 R_x_006,0,C:\Users\acer\Desktop\Data_2D\data_x\validatio...,True
0,57-16393 R_x_007,0,C:\Users\acer\Desktop\Data_2D\data_x\validatio...,True
0,57-16393 R_x_008,0,C:\Users\acer\Desktop\Data_2D\data_x\validatio...,True
0,57-16393 R_x_009,0,C:\Users\acer\Desktop\Data_2D\data_x\validatio...,True
