In [1]:
import json
import os
from reportlab.lib.styles import ParagraphStyle
from reportlab.platypus import Paragraph
from reportlab.graphics import shapes
from reportlab.lib import colors


folder_path = r'C:\Users\joash\Downloads\H3U07755_F2_report\H3U07755_F2_report\\'


with open(os.path.join(folder_path,"table1_summary.json"), 'r') as f:
    histologicalScoreData = json.load(f)
                       

with open(os.path.join(folder_path,"table2_summary.json"), 'r') as f:
    segmentAreaData = json.load(f)

with open(os.path.join(folder_path,"table3_summary.json"), 'r') as f:
    cellTypeCountData = json.load(f)
    
with open(os.path.join(folder_path,"table4_summary.json"), 'r') as f:
    cellTypePercentData = json.load(f)

#Remove empty key
for outer_key, inner_dict in histologicalScoreData.items():
    histologicalScoreData[outer_key] = {key: value for key, value in inner_dict.items() if key != ''}


In [2]:
def convertDictToArray(table, header,fontSize):
    wordStyleBold = ParagraphStyle(
        name='Normal',
        fontSize=fontSize,
        fontName='Times-Bold',
        alignment=1,
        leading=6
    )
    wordStyle = ParagraphStyle(
        name='Normal',
        fontSize=fontSize,
        fontName='Times-Roman',
        alignment=1,
        leading=6
    )

    def P(txt,wordStyle):
        return Paragraph(txt, wordStyle)

    wordStyle.whiteSpace = 'nowrap'
    header[0] = [P(i,wordStyleBold) for i in header[0]]

    for key, value in table.items():
        header.append([P(key,wordStyleBold)] + [P(str(val),wordStyle) for val in list(table[key].values())])

    return header

In [3]:
histologicalScoreTable = convertDictToArray(histologicalScoreData,[['','AI score','AI translated score','Pathologist score']],6)


In [4]:
wordStyleBold = ParagraphStyle(
    name='table',
    fontSize=6,
    fontName='Times-Bold',
    alignment=1,
    leading=5
    
)
wordStyle = ParagraphStyle(
    name='table',
    fontSize=6,
    fontName='Times-Roman',
    alignment=1,
    leading=5
)


def P(txt,wordStyle):
    return Paragraph(txt, wordStyle)



In [5]:
def getTextColor(background_color):
    luminance = (0.299 * background_color.red + 0.587 * background_color.green + 0.114 * background_color.blue)
    # Choose the text color based on the luminance
    if luminance < 0.5:  # Adjust this threshold as needed
        return colors.white
    else:
        return colors.black


In [6]:
cellTypeRename = {
  'CE': "Tumor/epithelial cells",
  'TIL': "Tumor infiltrating lymphocytes",
  'CEUK': "Unknown cell type",
  'fib': "Fibroblasts",
  'plasma': "Plasma cells",
  'End': "Endothelial cells",
  'nCE': "Normal cells",
  'Mph': "Macrophages",
  'Neu': "Neutrophils",
  'mimi': "Mitotic mimic",
  'MIT': "Mitotic cells",
}
segmentRename = {'SPA': "Space",
  'CT': "Cellular Tumor",
  'ST': "Stroma",
  'FAT': "Fatty tissue",
  'NE': "Necrosis",
  'Other': "Other",
  'Skin': "Skin",
  "BR-D": "Normal/dysplastic breast",}

In [7]:
segmentAreaTable = []
header = [P(i,wordStyleBold) for i in ['Segment','area in mm2','%area']]
segmentAreaTable.append(header)
for key,value in segmentAreaData.items():
    
    segment = ''
    if key in segmentRename:

        segment = P(segmentRename[key],wordStyleBold)
    area_in_mm2 = P(str(round(value['area in mm2'],2)),wordStyle)
    percent_area = P(str(round(value['%area'],2)),wordStyle)
    # Create a list with the extracted values
    extracted_data = [segment, area_in_mm2, percent_area]
    segmentAreaTable.append(extracted_data)


In [8]:
cellTypeCountTable = []
cellTypeHeader = [P(i,wordStyleBold) for i in ['Cell type', 'Total count', 'per mm²', 'per 1000 tumor cells']]
cellTypeCountTable.append(cellTypeHeader)
for key, values in cellTypeCountData.items():
    # Extract the values for 'Segment', 'Total count', 'per mm2', and 'per 1000 epithelial cells'
    segment = values['Segment']
    if segment in cellTypeRename:
        segment = P(cellTypeRename[segment],wordStyle)
    total_count = P("{:,}".format(values['Total count']),wordStyle)
    per_mm2 = P(str(round(values['per mm2'],2)),wordStyle)
    per_1000_epithelial_cells = P(str(values['per 1000 epithelial cells']),wordStyle)
    # Create a list with the extracted values
    extracted_data = [segment, total_count, per_mm2, per_1000_epithelial_cells]
    # Append the list to the result
    cellTypeCountTable.append(extracted_data)

In [9]:
cellTypePercentTable = []
cellTypePercentHeader = []
for key, values in cellTypePercentData.items():
    cellTypePercentHeader = list(values.keys())
    cellTypePercentTable.append(['Cell type']+ cellTypePercentHeader)
    break  # Exit the loop after extracting the header once
    
for key,values in cellTypePercentData.items():
    extracted_data = []
    if key in cellTypeRename:
        celltype = cellTypeRename[key]
        extracted_data.append(celltype)
    for i in cellTypePercentHeader:
        extracted_data.append(str(values[i])+'%')
    cellTypePercentTable.append(extracted_data)
    
cellTypePercentTable[0] = [segmentRename.get(header, header) for header in cellTypePercentTable[0] if header != ' Cell type']
cellTypePercentTable = [[P(j, wordStyleBold) if i == 0 else P(j, wordStyle) for i, j in enumerate(row)] for row in cellTypePercentTable]

In [10]:
%store segmentAreaTable
%store cellTypeCountTable
%store cellTypePercentTable
%store histologicalScoreTable

Stored 'segmentAreaTable' (list)
Stored 'cellTypeCountTable' (list)
Stored 'cellTypePercentTable' (list)
Stored 'histologicalScoreTable' (list)


In [11]:
#Mitotic Table

In [12]:
mitoticInfoTable = [['','Mitotic cells']]
for key,value in cellTypeCountData.items():
    for nestedKey,nestedValue in cellTypeCountData[key].items():
        if cellTypeCountData[key][nestedKey] == 'MIT':
            mitoticInfoTable.append(['Total count',str(cellTypeCountData[key]['Total count'])])
            mitoticInfoTable.append(['per mm2',str(round(cellTypeCountData[key]['per mm2'],2))])
            mitoticInfoTable.append(['per 1000 tumor cells',str(cellTypeCountData[key]['per 1000 epithelial cells'])])

In [13]:
import re

def getMitoticScore(text):
    # Define a regular expression pattern to extract numbers
    pattern = r'\b\d+\b'

    # Use re.findall() to find all matching numbers in the text
    matches = re.findall(pattern, text)

    # Extract the first number (in this case, "4")
    if matches:
        number = matches[0]
        return number


In [14]:
mitoticScore = getMitoticScore(histologicalScoreData['Mitotic score']['AI score'])
mitoticAIScore = str(histologicalScoreData['Mitotic score']['AI translated score'])

In [15]:
mitoticInfoTable.append(['Total mitosis in 10 consecutive HPF',mitoticScore])
mitoticInfoTable.append(['AI derived score',mitoticAIScore])

In [16]:
mitoticInfoTable = [[P(j, wordStyleBold) if i == 0 else P(j, wordStyle) for i, j in enumerate(row)] for row in mitoticInfoTable]

In [17]:
%store mitoticInfoTable

Stored 'mitoticInfoTable' (list)


In [18]:
legendData = []
for key in cellTypeCountData.keys():
    if cellTypeCountData[key]['Segment'] != 'MIT' and cellTypeCountData[key]['Segment'] != 'mimi':
        legendData.append(['',cellTypeRename[cellTypeCountData[key]['Segment']]])


In [19]:
%store legendData

Stored 'legendData' (list)


In [20]:
legendData

[['', 'Tumor infiltrating lymphocytes'],
 ['', 'Tumor/epithelial cells'],
 ['', 'Fibroblasts'],
 ['', 'Endothelial cells'],
 ['', 'Plasma cells'],
 ['', 'Macrophages'],
 ['', 'Normal cells'],
 ['', 'Neutrophils']]