In [2]:
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Set up the ImageDataGenerator for training and testing
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

# Define paths to your dataset directories
train_dir_fibrosis = 'D:/DATASET/CNN/fibrosis/train'
test_dir_fibrosis = 'D:/DATASET/CNN/fibrosis/test'

train_dir_inflammation = 'D:/DATASET/CNN/inflammation/train'
test_dir_inflammation = 'D:/DATASET/CNN/inflammation/test'

train_dir_ballooning = 'D:/DATASET/CNN/ballooning/train'
test_dir_ballooning = 'D:/DATASET/CNN/ballooning/test'

train_dir_steatosis = 'D:/DATASET/CNN/steatosis/train'
test_dir_steatosis = 'D:/DATASET/CNN/steatosis/test'

# Create the train and test generators for each disease
train_generator_fibrosis = train_datagen.flow_from_directory(
    train_dir_fibrosis, target_size=(299, 299), batch_size=32, class_mode='categorical')

test_generator_fibrosis = test_datagen.flow_from_directory(
    test_dir_fibrosis, target_size=(299, 299), batch_size=32, class_mode='categorical')

train_generator_inflammation = train_datagen.flow_from_directory(
    train_dir_inflammation, target_size=(299, 299), batch_size=32, class_mode='categorical')

test_generator_inflammation = test_datagen.flow_from_directory(
    test_dir_inflammation, target_size=(299, 299), batch_size=32, class_mode='categorical')

train_generator_ballooning = train_datagen.flow_from_directory(
    train_dir_ballooning, target_size=(299, 299), batch_size=32, class_mode='binary')

test_generator_ballooning = test_datagen.flow_from_directory(
    test_dir_ballooning, target_size=(299, 299), batch_size=32, class_mode='binary')

train_generator_steatosis = train_datagen.flow_from_directory(
    train_dir_steatosis, target_size=(299, 299), batch_size=32, class_mode='categorical')

test_generator_steatosis = test_datagen.flow_from_directory(
    test_dir_steatosis, target_size=(299, 299), batch_size=32, class_mode='categorical')

Found 5349 images belonging to 5 classes.
Found 1278 images belonging to 5 classes.
Found 9732 images belonging to 3 classes.
Found 491 images belonging to 3 classes.
Found 7973 images belonging to 2 classes.
Found 381 images belonging to 2 classes.
Found 38516 images belonging to 4 classes.
Found 7705 images belonging to 4 classes.


In [3]:
# Function to get class distribution from a generator
def get_class_distribution(generator):
    return generator.classes, generator.class_indices

# Get the class distributions for each disease
train_labels_fibrosis, train_class_indices_fibrosis = get_class_distribution(train_generator_fibrosis)
test_labels_fibrosis, test_class_indices_fibrosis = get_class_distribution(test_generator_fibrosis)

train_labels_inflammation, train_class_indices_inflammation = get_class_distribution(train_generator_inflammation)
test_labels_inflammation, test_class_indices_inflammation = get_class_distribution(test_generator_inflammation)

train_labels_ballooning, train_class_indices_ballooning = get_class_distribution(train_generator_ballooning)
test_labels_ballooning, test_class_indices_ballooning = get_class_distribution(test_generator_ballooning)

train_labels_steatosis, train_class_indices_steatosis = get_class_distribution(train_generator_steatosis)
test_labels_steatosis, test_class_indices_steatosis = get_class_distribution(test_generator_steatosis)

In [4]:
# Function to create a DataFrame with label counts for each disease
def create_label_distribution_df(train_labels, test_labels, disease_name, class_indices):
    train_counts = pd.Series(train_labels).value_counts()
    test_counts = pd.Series(test_labels).value_counts()
    
    # Map the class indices back to labels
    label_mapping = {v: k for k, v in class_indices.items()}
    
    # Create a DataFrame to store the counts
    df = pd.DataFrame({
        'Label': train_counts.index.map(label_mapping),
        'Train Count': train_counts.values,
        'Test Count': test_counts.reindex(train_counts.index, fill_value=0).values
    })
    df['Disease'] = disease_name
    return df

In [5]:
# Create individual DataFrames for each disease
fibrosis_df = create_label_distribution_df(train_labels_fibrosis, test_labels_fibrosis, 'Fibrosis', train_class_indices_fibrosis)
inflammation_df = create_label_distribution_df(train_labels_inflammation, test_labels_inflammation, 'Inflammation', train_class_indices_inflammation)
ballooning_df = create_label_distribution_df(train_labels_ballooning, test_labels_ballooning, 'Ballooning', train_class_indices_ballooning)
steatosis_df = create_label_distribution_df(train_labels_steatosis, test_labels_steatosis, 'Steatosis', train_class_indices_steatosis)


In [6]:
# Concatenate all the DataFrames into a single DataFrame
data_distribution_df = pd.concat([fibrosis_df, inflammation_df, ballooning_df, steatosis_df])

In [7]:
# Export the DataFrame to a CSV file for use in R
data_distribution_df.to_csv('D:/PATENT/disease_label_distribution.csv', index=False)

# Display the DataFrame (Optional)
# print(data_distribution_df)

In [None]:
# Load models and history (replace with actual paths)
fibrosis_model = load_model('D:/PATENT/Model_testing/fibrosis_modified_model25.h5')
inflammation_model = load_model('D:/PATENT/Model_testing/inflam_modified25.h5')
ballooning_model = load_model('D:/PATENT/Model_testing/model_modified_ballooning25.h5')
steatosis_model = load_model('D:/PATENT/Model_testing/steatosis_modified15.h5')

with open('D:/PATENT/Model_testing/fibrosis_modified_history25.json', 'r') as f:
    fibrosis_history = json.load(f)
    
with open('D:/PATENT/Model_testing/inflam_modified_history25.json', 'r') as f:
    inflammation_history = json.load(f)

with open('D:/PATENT/Model_testing/ballooning_modified_history25.json', 'r') as f:
    ballooning_history = json.load(f)

with open('D:/PATENT/Model_testing/steatosis_modified_history15.json', 'r') as f:
    steatosis_history = json.load(f)

In [None]:
# Function to extract data and labels from a generator
def extract_data_from_generator(generator):
    # Initialize lists to collect batches of images and labels
    images = []
    labels = []

    # Iterate over the generator to extract batches
    for i in range(len(generator)):
        img_batch, label_batch = next(generator)
        images.append(img_batch)
        labels.append(label_batch)

    # Concatenate all batches into a single array
    images = np.concatenate(images)
    labels = np.concatenate(labels)
    return images, labels

# Replace with actual training and testing data extraction using the function
X_train_fibrosis, y_train_fibrosis = extract_data_from_generator(train_generator_fibrosis)
X_test_fibrosis, y_test_fibrosis = extract_data_from_generator(test_generator_fibrosis)

X_train_inflammation, y_train_inflammation = extract_data_from_generator(train_generator_inflammation)
X_test_inflammation, y_test_inflammation = extract_data_from_generator(test_generator_inflammation)

X_train_ballooning, y_train_ballooning = extract_data_from_generator(train_generator_ballooning)
X_test_ballooning, y_test_ballooning = extract_data_from_generator(test_generator_ballooning)

X_train_steatosis, y_train_steatosis = extract_data_from_generator(train_generator_steatosis)
X_test_steatosis, y_test_steatosis = extract_data_from_generator(test_generator_steatosis)