### Analyse the pathological data.


In [1]:
import os
import re
import pandas as pd

In [2]:
def analyze_pathological_data(data):
    if "免疫组化结果：" in data:
        data_split=data.split("免疫组化结果：")
    else:
        data_split=data.split("免疫组化：")
    
    """
    1. Analyze the subtype and the WHO grade of gliomas;
    """
    patho_data=data_split[0]
    patho_data=patho_data.replace("(", "（");
    patho_data=patho_data.replace(")", "）");
    WHO_grade=analyze_WHO_grade(patho_data)
    lesion_location=analyze_lesion_location(patho_data)
    gliomas_subtype=analyze_gliomas_subtype(patho_data)
    
    
    """
    2. Analyze the status of gene data.
    """
    gene_data, gene_data_list, GFAP, Ki67, P53, IDH, H3_K27M, Olig2, EGFR, ATRX, EMA,  CD34, NeuN,  CgA, Syn="", "", "", "", "","", "", "", "", "", "", "", "", "","" 
    
    if len(data_split)>1:
        gene_data=data_split[1]
        gene_data=gene_data.replace("；", "，");
        gene_data=gene_data.replace("。", "，");
        gene_data=gene_data.replace(",", "，");
        
        gene_data_list=gene_data.split("，")
        GFAP=analyze_gene_data(gene_data_list, "GFAP")
        Ki67=analyze_gene_data(gene_data_list, "Ki67")
        P53=analyze_gene_data(gene_data_list, "P53")
        IDH=analyze_gene_data(gene_data_list, "IDH")
        H3_K27M=analyze_gene_data(gene_data_list, "H3 K27M")
        Olig2=analyze_gene_data(gene_data_list, "Olig2")
        EGFR=analyze_gene_data(gene_data_list, "EGFR")
        ATRX=analyze_gene_data(gene_data_list, "ATRX")
        EMA=analyze_gene_data(gene_data_list, "EMA")
        CD34=analyze_gene_data(gene_data_list, "CD34")
        NeuN=analyze_gene_data(gene_data_list, "NeuN")
        CgA=analyze_gene_data(gene_data_list, "CgA")
        Syn=analyze_gene_data(gene_data_list, "Syn")
    
    return patho_data, WHO_grade, lesion_location, gliomas_subtype, gene_data,gene_data_list,GFAP,Ki67,P53,IDH,H3_K27M,Olig2,EGFR,ATRX, EMA, CD34, NeuN, CgA, Syn


def analyze_WHO_grade(patho_data):

    if any(_ in patho_data for _ in ["WHOⅣ", "WHO Ⅳ", "WHOIV", "WHO IV", "WHO4", "WHO 4", "Ⅳ级", "Ⅳ 级", "IV级", "IV 级"]):
        WHO_grade=4
    elif any(_ in patho_data for _ in ["WHOⅢ", "WHO Ⅲ", "WHOIII", "WHO III", "WHO3", "WHO 3", "Ⅲ级", "Ⅲ 级", "III级", "III 级"]):
        WHO_grade=3
    elif any(_ in patho_data for _ in ["WHOⅡ", "WHO Ⅱ", "WHO II", "WHOII", "WHO2", "WHO 2", "Ⅱ级", "Ⅱ 级", "II级", "II 级"]):
        WHO_grade=2
    elif any(_ in patho_data for _ in ["WHOⅠ", "WHO Ⅰ", "WHOI", "WHO I", "WHO1", "WHO 1", "I级", "I 级", "Ⅰ级", "Ⅰ 级"]):
        WHO_grade=1
    else:
        WHO_grade=""
        
    return WHO_grade


def analyze_lesion_location(patho_data):
    lesion_location=patho_data.split("）")[0]
    start_index=lesion_location.find("（")
    
    return lesion_location[start_index+1:]

def analyze_gliomas_subtype(patho_data):
    
    
    if any(_ in patho_data for _ in ["胶质母细胞", "胶质瘤母细胞瘤"]):
        gliomas_subtype="胶质母细胞瘤"
    elif any(_ in patho_data for _ in ["少突胶质细胞瘤", "少突细胞瘤", "少突细胞胶质瘤", "少突胶质细胞质瘤"]):
        gliomas_subtype="少突胶质瘤"
    elif any(_ in patho_data for _ in ["星形细胞瘤", "星型细胞瘤", "星形胶质细胞瘤", "星形细胞胶质", "星型细胞胶质瘤", "星形胶质瘤"]):
        gliomas_subtype="星形细胞瘤"
    elif any(_ in patho_data for _ in ["节细胞胶质瘤", "节细胞细胞胶质瘤"]):
        gliomas_subtype="节细胞胶质瘤"   
    elif any(_ in patho_data for _ in ["中线胶质瘤"]):
        gliomas_subtype="中线胶质瘤"         
    else:
        gliomas_subtype=""
    
    return gliomas_subtype

def analyze_gene_data(gene_data_list, gene_filter):
    gene_data=""
    for data in gene_data_list:
        if gene_filter in data:
            gene_data=data
        
    return gene_data
    

In [3]:
base_path="F:\Code\Medical\Glioma_process"
excel_path=os.path.join(base_path, "result_file/diagnose_info.xlsx")
save_excel_path=os.path.join(base_path, "result_file/PathologicalData_DropNull_manualCorrected_analyzed.xlsx")


Data=pd.read_excel(excel_path,header=0)
Data



Unnamed: 0,PatientID,报告日期,病理诊断,姓名,性别,年龄
0,4191886,2015-05-06,（左额顶部）高级别胶质瘤（星形细胞胶质瘤，区域室管膜瘤改变，WHOⅡ-Ⅲ级）。免疫组化结果：...,虢惠玲,女,46岁
1,1593537,2014-07-07,（左额部）考虑星形细胞胶质瘤（WHOⅢ级）。免疫组化结果：GFAP（+），Ki67（约70%...,陈磊,男,32岁
2,3357742,2014-05-28,（右侧丘脑）星形细胞胶质瘤Ⅱ- Ⅲ 级（大部分为Ⅱ级，小部分为 Ⅲ 级）。免疫组化结果：GF...,任智,男,11岁
3,3270360,2014-07-15,（颞枕顶叶，胶质瘤切除术后）多形性胶质母细胞瘤（WHOⅣ级）。,刘明山,男,49岁
4,3850375,2014-11-25,（左枕顶）星形细胞胶质瘤Ⅳ级，伴坏死。免疫组化结果：GFAP（++），Ki67（+）约15-...,刘福元,男,58岁
...,...,...,...,...,...,...
2973,10383395,,（右颞占位）高级别胶质瘤，待分子检测进一步明确。\n免疫组化结果：GFAP（+），Ki67（...,李龙,,
2974,10586638,,（右侧额叶）低级别胶质瘤，结合免疫组化及分子检测结果，倾向少突胶质细胞瘤，WHO2级。\n免...,万锦,,
2975,9553737,,（右额叶）IDH突变型少突胶质细胞瘤，伴1p/19q共缺失，WHO2级。\n免疫组化结果：G...,胡二妹,,
2976,9478725,,（四脑室）低级别胶质瘤，结合免疫组化，符合毛细胞型星形细胞瘤（WHOⅠ级）。\n免疫组化结果...,戴紫乐,,


In [4]:
# pathological_data=Data["病理诊断（手动矫正版本）"]
pathological_data=Data
Data["pathological_data"],Data["WHO_grade"], Data["lesion_location"], Data["gliomas_subtype"],Data["gene_data"],Data["gene_data_list"],Data["GFAP"],Data["Ki67"],Data["P53"],Data["IDH"],Data["H3_K27M"],Data["Olig2"],Data["EGFR"],Data["ATRX"], Data["EMA"], Data["CD34"], Data["NeuN"], Data["CgA"], Data["Syn"]=zip(*pathological_data.apply(analyze_pathological_data))
Data.to_excel(save_excel_path)



AttributeError: 'Series' object has no attribute 'split'

In [None]:
Data