In [2]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount = True)

Mounted at /content/gdrive


In [None]:
import pandas as pd

# Read the CSV files
data_primary_objects = pd.read_csv('/content/gdrive/MyDrive/Hepatoma-Staging-Project/Dataset/AllimagesIdentifyPrimaryObjects.csv')
data_channel_DNA = pd.read_csv('/content/gdrive/MyDrive/Hepatoma-Staging-Project/Dataset/AllimagesImage.csv')

# Calculate metrics for each image
image_metrics = data_primary_objects.groupby('ImageNumber').agg(
    Total_Cell_Count=('ObjectNumber', 'count'),
    Average_Cell_Area=('AreaShape_Area', 'mean'),
    Spatial_Variance_X=('AreaShape_Center_X', lambda x: x.var()),  # Spatial variance for X coordinate
    Spatial_Variance_Y=('AreaShape_Center_Y', lambda y: y.var()),  # Spatial variance for Y coordinate
    CV_Cell_Area=('AreaShape_Area', lambda x: x.std() / x.mean()),
    Cell_Density=('AreaShape_Area', 'sum'),
    Perimeter_to_Area_Ratio=('AreaShape_Perimeter', lambda x: x.sum() / x.count()),
    Compactness_Variation=('AreaShape_Eccentricity', lambda x: x.std() / x.mean()),
    NCR=('AreaShape_MajorAxisLength', 'sum'),
    Ferets_Diameter_Variation=('AreaShape_MaxFeretDiameter', lambda x: x.std() / x.mean()),
    # Add more metrics as needed
).reset_index()

# Nuclear-to-Cytoplasmic Ratio (NCR) per Image
image_metrics['NCR'] = image_metrics['NCR'] / data_primary_objects.groupby('ImageNumber')['AreaShape_MinorAxisLength'].sum().values

# Cell Density per Image
total_area_per_image = data_primary_objects.groupby('ImageNumber')['AreaShape_Area'].sum()
image_metrics['Cell_Density'] = image_metrics['Total_Cell_Count'] / total_area_per_image

# Merge with data_channel_DNA on ImageNumber
image_metrics_with_filename = pd.merge(image_metrics, data_channel_DNA[['ImageNumber', 'FileName_DNA']], on='ImageNumber', how='left')

# Display the resulting DataFrame
print(image_metrics_with_filename)


     ImageNumber  Total_Cell_Count  Average_Cell_Area  Spatial_Variance_X  \
0              1                97          76.350515         1208.028489   
1              2                91          82.516484         1071.059599   
2              3                95          39.905263         1272.225568   
3              4                97          45.639175         1142.810901   
4              5               100          56.850000         1257.257988   
..           ...               ...                ...                 ...   
678          679                84          34.535714         1433.963473   
679          680                91          64.846154         1210.402417   
680          681                68         102.250000         1176.278997   
681          682                95          53.652632         1263.315822   
682          683                86          56.674419         1130.713496   

     Spatial_Variance_Y  CV_Cell_Area  Cell_Density  Perimeter_to_Area_Rati

In [5]:
import os

# Directory containing the ADASYN images
adasyn_directory = '/content/gdrive/MyDrive/Hepatoma-Staging-Project/Dataset/Hepatoma-Stages-Dataset/ADASYN_images/'

# Function to extract stage from file name
def extract_stage(file_name):
    if 'Non_cancerous' in file_name:
        return 0
    elif 'Stage_1' in file_name:
        return 1
    elif 'Stage_2' in file_name:
        return 2
    elif 'Stage_3' in file_name:
        return 3
    else:
        return None  # Return None for files not matching any stage

# Dictionary to store image stage information
image_stage = {}

# Iterate through each image file in the ADASYN directory
for root, dirs, files in os.walk(adasyn_directory):
    for file in files:
        # Extract the file name and stage
        stage = extract_stage(root)
        if stage is not None:
            image_stage[file.split('.')[0]] = stage  # Use file name without extension as key

# Assign stages to images in image_metrics_with_filename DataFrame
image_metrics_with_filename['Stage'] = image_metrics_with_filename['FileName_DNA'].apply(lambda x: image_stage.get(x.split('.')[0], None))

# Display the resulting DataFrame with stage information
print(image_metrics_with_filename)


     ImageNumber  Total_Cell_Count  Average_Cell_Area  Spatial_Variance_X  \
0              1                97          76.350515         1208.028489   
1              2                91          82.516484         1071.059599   
2              3                95          39.905263         1272.225568   
3              4                97          45.639175         1142.810901   
4              5               100          56.850000         1257.257988   
..           ...               ...                ...                 ...   
678          679                84          34.535714         1433.963473   
679          680                91          64.846154         1210.402417   
680          681                68         102.250000         1176.278997   
681          682                95          53.652632         1263.315822   
682          683                86          56.674419         1130.713496   

     Spatial_Variance_Y  CV_Cell_Area  Cell_Density  Perimeter_to_Area_Rati

In [6]:
image_metrics_with_filename.head(10)

Unnamed: 0,ImageNumber,Total_Cell_Count,Average_Cell_Area,Spatial_Variance_X,Spatial_Variance_Y,CV_Cell_Area,Cell_Density,Perimeter_to_Area_Ratio,Compactness_Variation,NCR,Ferets_Diameter_Variation,Max_Central_Moment,Max_Normalized_Moment,Max_Zernike_Moment,FileName_DNA,Stage
0,1,97,76.350515,1208.028489,1146.888001,0.784962,7406,32.414154,0.297473,1182.503813,0.515609,2627927.0,0.225962,1.336902,f0.jpeg,3
1,2,91,82.516484,1071.059599,1297.737355,0.775645,7509,34.447118,0.25722,1194.539304,0.537113,1040935.0,0.232924,1.27324,f1.jpeg,3
2,3,95,39.905263,1272.225568,1175.430748,0.51093,3791,21.227435,0.366162,799.336447,0.373819,26012.26,0.271818,1.336902,f10.jpeg,0
3,4,97,45.639175,1142.810901,1348.356808,0.497392,4427,23.403258,0.266257,912.119767,0.390155,50819.4,0.308698,1.336902,f100.jpeg,1
4,5,100,56.85,1257.257988,1198.09211,0.574858,5685,26.780704,0.246092,1037.39409,0.374527,193774.1,0.215029,1.27324,f101.jpeg,3
5,6,111,45.63964,1136.192178,1226.50425,0.620834,5066,23.429865,0.300944,1026.616863,0.442015,146780.5,0.265675,1.336902,f102.jpeg,0
6,7,100,71.59,984.375878,1025.882474,0.800495,7159,32.059598,0.224924,1182.622237,0.439684,1173590.0,0.22772,1.098042,f103.jpeg,3
7,8,95,59.231579,962.95916,1153.456958,0.706749,5627,28.463086,0.25613,1029.309616,0.490509,860931.4,0.246294,1.336902,f104.jpeg,1
8,9,58,103.62069,1064.778943,1025.615203,0.840187,6010,39.737568,0.203485,822.358984,0.513861,1630102.0,0.211651,1.085998,f105.jpeg,3
9,10,90,43.044444,1220.52469,1236.889418,0.531822,3874,22.627121,0.346862,804.728856,0.369211,21861.77,0.235164,1.336902,f106.jpeg,0


In [7]:
# Define the path to save the CSV file
csv_path = '/content/gdrive/MyDrive/Hepatoma-Staging-Project/Dataset/Image_metrics3.csv'

# Write the DataFrame to a CSV file
image_metrics_with_filename.to_csv(csv_path, index=False)

In [4]:
import pandas as pd

# Read the CSV files
data_primary_objects = pd.read_csv('/content/gdrive/MyDrive/Hepatoma-Staging-Project/Dataset/AllimagesIdentifyPrimaryObjects.csv')
data_channel_DNA = pd.read_csv('/content/gdrive/MyDrive/Hepatoma-Staging-Project/Dataset/AllimagesImage.csv')

# Define function to aggregate moments per image
def aggregate_moments(group):
    # Aggregate central moments
    central_moments = group.filter(like='CentralMoment').max()

    # Aggregate normalized moments
    normalized_moments = group.filter(like='NormalizedMoment').max()

    # Aggregate Zernike moments
    zernike_moments = group.filter(like='Zernike').max()

    return pd.Series({
        'Max_Central_Moment': central_moments.max(),
        'Max_Normalized_Moment': normalized_moments.max(),
        'Max_Zernike_Moment': zernike_moments.max()
    })

# Group by ImageNumber and apply the aggregation function
aggregated_moments = data_primary_objects.groupby('ImageNumber').apply(aggregate_moments).reset_index()

# Merge with other image metrics if needed
image_metrics = data_primary_objects.groupby('ImageNumber').agg(
    Total_Cell_Count=('ObjectNumber', 'count'),
    Average_Cell_Area=('AreaShape_Area', 'mean'),
    Spatial_Variance_X=('AreaShape_Center_X', 'var'),  # Spatial variance for X coordinate
    Spatial_Variance_Y=('AreaShape_Center_Y', 'var'),  # Spatial variance for Y coordinate
    CV_Cell_Area=('AreaShape_Area', lambda x: x.std() / x.mean()),
    Cell_Density=('AreaShape_Area', 'sum'),
    Perimeter_to_Area_Ratio=('AreaShape_Perimeter', lambda x: x.sum() / x.count()),
    Compactness_Variation=('AreaShape_Eccentricity', lambda x: x.std() / x.mean()),
    NCR=('AreaShape_MajorAxisLength', 'sum'),
    Ferets_Diameter_Variation=('AreaShape_MaxFeretDiameter', lambda x: x.std() / x.mean())
).reset_index()

# Merge with aggregated moments on ImageNumber
image_metrics_with_moments = pd.merge(image_metrics, aggregated_moments, on='ImageNumber', how='left')

# Merge with data_channel_DNA on ImageNumber
image_metrics_with_filename = pd.merge(image_metrics_with_moments, data_channel_DNA[['ImageNumber', 'FileName_DNA']], on='ImageNumber', how='left')

# Display the resulting DataFrame
print(image_metrics_with_filename)





     ImageNumber  Total_Cell_Count  Average_Cell_Area  Spatial_Variance_X  \
0              1                97          76.350515         1208.028489   
1              2                91          82.516484         1071.059599   
2              3                95          39.905263         1272.225568   
3              4                97          45.639175         1142.810901   
4              5               100          56.850000         1257.257988   
..           ...               ...                ...                 ...   
678          679                84          34.535714         1433.963473   
679          680                91          64.846154         1210.402417   
680          681                68         102.250000         1176.278997   
681          682                95          53.652632         1263.315822   
682          683                86          56.674419         1130.713496   

     Spatial_Variance_Y  CV_Cell_Area  Cell_Density  Perimeter_to_Area_Rati

In [3]:
import pandas as pd


data_primary_objects = pd.read_csv('/content/gdrive/MyDrive/Hepatoma-Staging-Project/Dataset/AllimagesIdentifyPrimaryObjects.csv')
data_channel_DNA = pd.read_csv('/content/gdrive/MyDrive/Hepatoma-Staging-Project/Dataset/AllimagesImage.csv')

data = data_primary_objects
# Define lists to store column names for each moment type
central_moments_columns = []
normalized_moments_columns = []
zernike_moments_columns = []

# Iterate over columns to identify central, normalized, and Zernike moments
for column in data.columns:
    if 'CentralMoment' in column:
        central_moments_columns.append(column)
    elif 'NormalizedMoment' in column:
        normalized_moments_columns.append(column)
    elif 'Zernike' in column:
        zernike_moments_columns.append(column)

# Compute sum for each type of moment
sum_central_moments = data[central_moments_columns].sum(axis=1)
sum_normalized_moments = data[normalized_moments_columns].sum(axis=1)
sum_zernike_moments = data[zernike_moments_columns].sum(axis=1)

# Add the sums as new columns in the DataFrame
data['Sum_Central_Moments'] = sum_central_moments
data['Sum_Normalized_Moments'] = sum_normalized_moments
data['Sum_Zernike_Moments'] = sum_zernike_moments

# Display the DataFrame with the added columns
print(data)


       ImageNumber  ObjectNumber  AreaShape_Area  AreaShape_BoundingBoxArea  \
0                1             1              23                         36   
1                1             2              51                         64   
2                1             3              52                         64   
3                1             4              48                         63   
4                1             5              95                        140   
...            ...           ...             ...                        ...   
61138          683            82              76                        117   
61139          683            83              23                         36   
61140          683            84              99                        154   
61141          683            85              55                         72   
61142          683            86              23                         30   

       AreaShape_BoundingBoxMaximum_X  AreaShape_Bo