In [1]:
import pandas as pd
from datetime import datetime
import csv, random, re, math
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
import seaborn as sn
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
pd.options.plotting.backend = "plotly"
sn.set(rc = {'figure.figsize':(20,10)})
plt.rcParams["figure.figsize"] = (20,10)



In [35]:
def translateEntry(df,metaData,chemName,DEBUG=False):
    Entry = re.split(" |=",df[chemName])
    #EffectName = df["preferred name"].split(":")
    EffectName = [df["preferred name"][:3], df["preferred name"][3:]]
    Effect = [Entry[1],int(Entry[3])] # Effect that is being indexed, positive 1 or negative (not present) 0
    Mag = float(Entry[4][1:][:-1]) # magnitude of effect compared to nearest neighbours (other columns)
    AUC = float(Entry[6]) # Area Under Curve, measure for accuracy
    p_Val = float(Entry[8]) # p-value, confidence interval. Lower is better
    EffectName.append(int(Entry[3]))
    EffectName.append(Mag)
    EffectName.append(AUC)
    EffectName.append(p_Val)
    df.drop(chemName)
    if DEBUG:
        print(EffectName)
        print(Entry)
        print("--** Effect:{} -- Mag:{} -- AUC:{} -- P-Value:{} **--".format(Effect, Mag, AUC, p_Val))
    thresholdSimilarityVector = []
    thresholdSimilaritySum = 0
    for i,x in enumerate(df[2:]):
        mgkgday = re.findall('\d*\.?\d+',x) # Filter out thresholds in mg/kg/day (I assume mg compound to kg body weight per day)
        if len(mgkgday) ==  0: # no number detected
            mgkgday = 0.0
        else:
            mgkgday = float(mgkgday[0])
        mgkgday /= float(metaData.iloc[2][i+2]) # divide by similarity index; higher similarity means chemical/toxilogical fingerprint is more similar. Range will be bigger if more uncertainty
        thresholdSimilarityVector.append(mgkgday)
        thresholdSimilaritySum += mgkgday*Mag
    thresholdSimilarityVector.append(thresholdSimilaritySum)
    if DEBUG:
        display(thresholdSimilarityVector)
    return EffectName+thresholdSimilarityVector
    
def computeHealthIndex(fileName,DEBUG=False):
    genRAData = pd.read_csv("{}.csv".format(fileName), header=1)
    metaData = genRAData[:3]
    chemName = metaData.columns[1]
    genRAData = genRAData[3:]
    if DEBUG:
        print(metaData)
        print("Test translation:")
        translateEntry(genRAData.iloc[0],metaData,chemName,DEBUG=DEBUG)
        print(translateEntry(genRAData.iloc[random.randrange(len(genRAData))],metaData,chemName,DEBUG=DEBUG))
    healthIndexData = []
    effectLabels = []
    targetLabels = []
    for i in genRAData.iloc:
        healthIndexData.append(translateEntry(i,metaData,chemName))
        effectLabels.append(healthIndexData[-1][0])
        targetLabels.append(healthIndexData[-1][1])
    return healthIndexData ,list(set(effectLabels)), list(set(targetLabels)), metaData, chemName

In [23]:
def plotResult(table, df, FileNames):
    chemicals = []
    for i in FileNames:
        chemicals.append(i.split("_")[1])
    sn.heatmap(table, linewidths=.0).figure.savefig('Heatmap_All.png')
    
    fig = go.Figure(go.Bar(y=df[df.Formula == chemicals[0]]["positive targets decimal"].values, x=df[df.Formula == chemicals[0]]["Effectgroup"].values, text=chemicals[0]))
    for i in chemicals[1:]:
        fig.add_trace(go.Bar(y=df[df.Formula == i]["positive targets decimal"].values, x=df[df.Formula == i]["Effectgroup"], name=i))
    #fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
    fig.update_layout(height=800, width=3000,uniformtext_minsize=8, uniformtext_mode='hide', barmode='stack', xaxis={'categoryorder':'total descending'})
    fig.write_image("Chemicals-Effectgroup_bar.png")
    fig.show()


    TargetEffectLabels
    fig = go.Figure(go.Bar(y=df[df.Testgroup == TargetEffectLabels[0][0]]["positive targets decimal"].values, x=df[df.Testgroup == TargetEffectLabels[0][0]]["Effectgroup"].values, text=TargetEffectLabels[0][0]))
    for i in TargetEffectLabels[1:]:
        fig.add_trace(go.Bar(y=df[df.Testgroup == i[0]]["positive targets decimal"].values, x=df[df.Testgroup == i[0]]["Effectgroup"], name=i[0]))
    #fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
    fig.update_layout(height=800, width=3000,uniformtext_minsize=8, uniformtext_mode='hide', barmode='stack', xaxis={'categoryorder':'total descending'})
    fig.write_image("Testgroup-Effectgroup_bar.png")
    fig.show()



    fig = go.Figure(data=go.Scatter3d(
        x=df["Effectgroup"],
        y=df["Formula"],
        z=df['Testgroup'],
        text=df['positive targets decimal'],
        mode='markers',
        marker=dict(
            sizemode='diameter',
            sizeref=0.5,
            size=df['positive targets decimal'],
            color = df['total targets'],
            colorscale = 'Viridis',
            colorbar_title = 'decimal<br>targets',
            line_color='rgb(140, 140, 170)'
        )
    ))
    fig.update_layout(height=1000, width=2000,title='Effectgroup Vs testgroup Vs chemical, size denotes sum of positive targets, color denotes amount of targets')
    fig.write_image("3D_plot.png")
    fig.show()



    fig = go.Figure(go.Histogram2d(
            x=df["Effectgroup"],
            y=df["positive targets decimal"]
        ))
    fig.update_layout(height=1600, width=1600)
    fig.write_image("Effectgroup_score.png")
    fig.show()
    fig = go.Figure(go.Histogram2d(
            x=df["Testgroup"],
            y=df["positive targets decimal"]
        ))
    fig.write_image("Testgroup_score.png")
    fig.show()
    fig = go.Figure(go.Histogram2d(
            x=df["Formula"],
            y=df["positive targets decimal"]
        ))
    fig.write_image("Chemical_score.png")
    fig.show()

In [38]:
def df_normalize(df):
    return df/df.max()

TargetEffectLabels = [["MGR"],["REP"],["DEV"],["CHR"],["SUB"],["SAC"]]
def TrackTargetEffectLabels(HID,Labels):
    for i in HID:
        for j,x in enumerate(Labels):
            if i[0] == x[0]:
                Labels[j].append(i[1][1:])
    for i,x in enumerate(Labels):
        TestType = x[0]
        x.remove(TestType)
        x.sort()
        Labels[i] = [TestType]+list(set(x))
    return Labels

def metaDataGenEffects(effectLabels, HID):
    metaDataHID = []
    for i,x in enumerate(effectLabels):
        Labs = [0,0,0,0,0]
        for j,y in enumerate(HID):
            if x == y[0]: # Is labeled similar
                Labs[0] += 1
                Labs[1] += y[2]
                Labs[2] += y[3]
                Labs[3] += y[-1]
                Labs[4]  = y[0]
        metaDataHID.append(Labs)
    return metaDataHID

def metaDataGenTargets(targetLabels, HID):
    metaDataHID = []
    for i,x in enumerate(targetLabels):
        Labs = [0,0,0,0,0,0]
        for j,y in enumerate(HID):
            if x == y[1]: # Is labeled similar
                Labs[0] += 1
                Labs[1] += y[2]
                Labs[2] += y[3]
                Labs[3] += y[-1]
                Labs[4]  = y[1]
                Labs[5]  = y[0] # But save testgroup data! so we get a very complete dataset :).
        metaDataHID.append(Labs)
    return metaDataHID

def metaDataReport(effectLabels,metaDataHID,md,label):
    display(md)
    FileName = "{}_{}_metadata.csv".format(label,md.columns[1])
    md.to_csv(FileName)
    Header = ["Effectgroup","total targets", "positive targets", "positive targets decimal", "weighted and averaged limit [mg/kg/day]"]
    with open(FileName, 'a', newline='') as f:
        Writer = csv.writer(f)
        Writer.writerow(Header)
        for i, x in enumerate(effectLabels):
            print("{} - total targets: {} - Targets positive: {} - Total targets decimal: {:.2f} - weighted and averaged limit {:.2f} [mg/kg/day]".format(x,metaDataHID[i][0],metaDataHID[i][1],metaDataHID[i][2],metaDataHID[i][3]))
            Writer.writerow([x]+metaDataHID[i])
        f.close()



def BatchReport(fileNames, fileName, fcn="sum", Viz = False):
    Header = ["Formula","Name","total targets", "positive targets", "positive targets decimal", "weighted and averaged limit [mg/kg/day]","Effectgroup"]
    TargetEffectLabels = [["MGR"],["REP"],["DEV"],["CHR"],["SUB"],["SAC"]]
    MetaBlobs = []
    analogBlobs = []
    TargetBlobs = []
    compoundNames = []
    for label in fileNames:
        #label = "genra_O3"
        HID, effectLabels, targetLabels, md, Compound = computeHealthIndex(label)#Ozone
        compoundNames.append(Compound)
        metaDataHID = metaDataGenEffects(effectLabels, HID)
        metaDataHIDt = metaDataGenTargets(targetLabels,HID)
        metaDataReport(effectLabels,metaDataHID,md,label)
        TargetEffectLabels = TrackTargetEffectLabels(HID,TargetEffectLabels)
        TargetBlobs.append(metaDataHIDt)
        MetaBlobs.append(metaDataHID)
        analogBlobs.append(md)
    
    with open("{}_Meta.csv".format(fileName), "w", newline='') as f:
        Writer = csv.writer(f)
        Writer.writerow(Header)
        for i,Blob in enumerate(MetaBlobs):
            for lines in Blob:
                print(lines)
                Writer.writerow([fileNames[i].split("_")[1],compoundNames[i]]+lines)
        f.close()
        
    with open("{}_Target.csv".format(fileName), "w", newline='') as f:
        Writer = csv.writer(f)
        Header.append("Testgroup")
        Writer.writerow(Header)
        for i,Blob in enumerate(TargetBlobs):
            for lines in Blob:
                print(lines)
                Writer.writerow([fileNames[i].split("_")[1],compoundNames[i]]+lines)
        f.close()
    df = pd.read_csv("Batch_Report_Target.csv")#Read the just made file back into DF to use pivottables
    nan_value = float("NaN")
    df.replace("", nan_value, inplace=True)
    df.replace(nan_value, 0,  inplace=True)

    if fcn == "sum":
        table = pd.pivot_table(df, values='positive targets decimal', index=['Effectgroup'],columns=['Formula',"Testgroup"], aggfunc=np.sum)
    elif fcn == "average":
        table = pd.pivot_table(df, values='positive targets decimal', index=['Effectgroup'],columns=['Formula',"Testgroup"], aggfunc=np.average)
    else:
        table = pd.pivot_table(df, values='positive targets decimal', index=['Effectgroup'],columns=['Formula',"Testgroup"], aggfunc=np.mean)
    table.replace("", nan_value, inplace=True)
    table.replace(nan_value, 0,  inplace=True)
    table = df_normalize(table)
    table.to_csv("AEI_Norm_Test.csv")
    if Viz:
         plotResult(table, df, fileNames)
    return MetaBlobs, TargetBlobs, analogBlobs, table, df
        
            

In [39]:
fileNames = ["genra_O3","genra_C10H16O2","genra_C8H14O","genra_CH3CHO","genra_C10H16","genra_N2","genra_CO2","genra_CH2O"]
compoundNames = ["Ozone","3-Isopropenyl-6-oxo-heptanal","6-Methyl-5-hepten-2-one","Acetaldehyde","D-Limonene","Nitrogen","Carbon dioxide","Formaldehyde"]
FileName = "Batch_Report"
MetaBlobs, TargetBlobs, analogBlobs, PVTable, target_DF = BatchReport(fileNames,FileName)

Unnamed: 0,preferred name,Ozone,Sodium nitrite,Barium metaborate,Trisodium arsenate,Silver nitrate,Monosodium methanearsonate,Potassium dichromate,Trichloronitromethane,Sodium dichromate dihydrate,Sodium fluoroacetate,Sodium trichloroacetate,"Butanedioic acid, 2,3-dihydroxy- (2R,3R)-, sodium salt (1:2)",Tetrakis(hydroxymethyl)phosphonium sulfate,Silica,Lithium hypochlorite,"2-(Hydroxymethyl)-2-nitro-1,3-propanediol"
0,dsstox_sid,DTXSID0021098,DTXSID0020941,DTXSID1034347,DTXSID9039367,DTXSID3032042,DTXSID2025686,DTXSID5025948,DTXSID0020315,DTXSID6032061,DTXSID8024311,DTXSID6034924,DTXSID2057861,DTXSID0021331,DTXSID1029677,DTXSID1034688,DTXSID8027034
1,molecular weight,47.997,68.995,222.940,207.887,169.872,161.951,294.182,164.370,297.995,100.024,185.360,194.050,406.280,60.083,58.390,151.118
2,similarity,1.00001,0.18181818181818182,0.18181818181818182,0.18181818181818182,0.18181818181818182,0.13333333333333333,0.13333333333333333,0.13333333333333333,0.125,0.125,0.125,0.11764705882352941,0.1111111111111111,0.1111111111111111,0.1111111111111111,0.10526315789473684


MGR - total targets: 55 - Targets positive: 12 - Total targets decimal: 8.50 - weighted and averaged limit 3469.12 [mg/kg/day]
DEV - total targets: 48 - Targets positive: 23 - Total targets decimal: 23.50 - weighted and averaged limit 87704.35 [mg/kg/day]
SAC - total targets: 74 - Targets positive: 18 - Total targets decimal: 17.10 - weighted and averaged limit 94654.44 [mg/kg/day]
CHR - total targets: 78 - Targets positive: 24 - Total targets decimal: 27.83 - weighted and averaged limit 9685.57 [mg/kg/day]
SUB - total targets: 92 - Targets positive: 32 - Total targets decimal: 33.75 - weighted and averaged limit 84568.61 [mg/kg/day]


Unnamed: 0,preferred name,3-Isopropenyl-6-oxo-heptanal,4-(4-Hydroxyphenyl)butan-2-one,2-Ethylhexanoic acid,Methacrylamide,Myrcene,Ethyl acrylate,"3,7-Dimethyl-2,6-octadienal",Tetraacetylethylenediamine,Acetamide,Ethylene glycol monoethyl ether acetate,Daminozide,Bis(2-ethylhexyl)hexanedioate,Acrylamide,Methacrylonitrile,"N,N'-Methylenebisacrylamide",Glufosinate-ammonium
0,dsstox_sid,DTXSID40874121,DTXSID5044495,DTXSID9025293,DTXSID8029600,DTXSID6025692,DTXSID4020583,DTXSID6024836,DTXSID5040752,DTXSID7020005,DTXSID9021928,DTXSID9020370,DTXSID0020606,DTXSID5020027,DTXSID1024176,DTXSID8025595,DTXSID1024120
1,molecular weight,168.236,164.204,144.214,85.106,136.238,100.117,152.237,228.248,59.068,132.159,160.173,370.574,71.079,67.091,154.169,198.159
2,similarity,1.00001,0.2127659574468085,0.20930232558139536,0.20588235294117646,0.17777777777777778,0.17073170731707318,0.1702127659574468,0.15789473684210525,0.15625,0.15555555555555556,0.15217391304347827,0.14285714285714285,0.14285714285714285,0.14285714285714285,0.14285714285714285,0.14


MGR - total targets: 53 - Targets positive: 4 - Total targets decimal: 4.00 - weighted and averaged limit 560.00 [mg/kg/day]
DEV - total targets: 55 - Targets positive: 29 - Total targets decimal: 31.15 - weighted and averaged limit 105303.36 [mg/kg/day]
SAC - total targets: 43 - Targets positive: 3 - Total targets decimal: 3.00 - weighted and averaged limit 61250.00 [mg/kg/day]
REP - total targets: 34 - Targets positive: 10 - Total targets decimal: 10.00 - weighted and averaged limit 47812.50 [mg/kg/day]
CHR - total targets: 60 - Targets positive: 5 - Total targets decimal: 8.60 - weighted and averaged limit 47705.15 [mg/kg/day]
SUB - total targets: 86 - Targets positive: 18 - Total targets decimal: 23.10 - weighted and averaged limit 180007.59 [mg/kg/day]


Unnamed: 0,preferred name,6-Methyl-5-hepten-2-one,"3,7-Dimethyl-2,6-octadienal",Myrcene,4-(4-Hydroxyphenyl)butan-2-one,Ethyl acrylate,Acetamide,tert-Butyl acetate,Urethane,Tetraacetylethylenediamine,Ethylene glycol monoethyl ether acetate,Methacrylamide,"1,4-Bis(bromoacetoxy)-2-butene",2-Ethylhexanoic acid,Fosamine ammonium,Butyraldehyde oxime,Glycolic acid
0,dsstox_sid,DTXSID5021629,DTXSID6024836,DTXSID6025692,DTXSID5044495,DTXSID4020583,DTXSID7020005,DTXSID1022055,DTXSID9021427,DTXSID5040752,DTXSID9021928,DTXSID8029600,DTXSID0034308,DTXSID9025293,DTXSID9032406,DTXSID2024664,DTXSID0025363
1,molecular weight,126.199,152.237,136.238,164.204,100.117,59.068,116.160,89.094,228.248,132.159,85.106,329.972,144.214,170.105,87.122,76.051
2,similarity,1.00001,0.4375,0.41935483870967744,0.23076923076923078,0.21875,0.21739130434782608,0.21428571428571427,0.20689655172413793,0.20689655172413793,0.19444444444444445,0.18518518518518517,0.16666666666666666,0.16216216216216217,0.16216216216216217,0.16129032258064516,0.16


SUB - total targets: 92 - Targets positive: 31 - Total targets decimal: 33.06 - weighted and averaged limit 217667.05 [mg/kg/day]
REP - total targets: 34 - Targets positive: 10 - Total targets decimal: 10.00 - weighted and averaged limit 20269.23 [mg/kg/day]
CHR - total targets: 56 - Targets positive: 6 - Total targets decimal: 7.75 - weighted and averaged limit 12210.22 [mg/kg/day]
DEV - total targets: 58 - Targets positive: 34 - Total targets decimal: 34.75 - weighted and averaged limit 83943.22 [mg/kg/day]


Unnamed: 0,preferred name,Acetaldehyde,"N,N-Dimethylformamide",Formamide,Glutaraldehyde,"2E,4E-Hexadienoic acid",Dimethylarsinic acid,Acetamide,"1,2-Benzenedicarboxaldehyde",N-Nitrosodimethylamine,Benzaldehyde,"3,7-Dimethyl-2,6-octadienal","N,N'-Bis(1-formamido-2,2,2-trichloroethyl)piperazine",Dimethyl phosphonate,Methyl methanesulfonate,Ethyl acrylate,Monosodium methanearsonate
0,dsstox_sid,DTXSID5039224,DTXSID6020515,DTXSID8025337,DTXSID6025355,DTXSID3021277,DTXSID7020508,DTXSID7020005,DTXSID6032514,DTXSID7021029,DTXSID8039241,DTXSID6024836,DTXSID5032654,DTXSID5020493,DTXSID7020845,DTXSID4020583,DTXSID2025686
1,molecular weight,44.053,73.095,45.041,100.117,112.128,137.998,59.068,134.134,74.083,106.124,152.237,434.950,110.049,110.130,100.117,161.951
2,similarity,1.00001,0.36363636363636365,0.3333333333333333,0.2,0.19047619047619047,0.16666666666666666,0.16666666666666666,0.15789473684210525,0.15384615384615385,0.15,0.14285714285714285,0.14285714285714285,0.14285714285714285,0.14285714285714285,0.13636363636363635,0.13333333333333333


MGR - total targets: 59 - Targets positive: 17 - Total targets decimal: 16.88 - weighted and averaged limit 42238.87 [mg/kg/day]
DEV - total targets: 42 - Targets positive: 18 - Total targets decimal: 19.23 - weighted and averaged limit 37290.56 [mg/kg/day]
SAC - total targets: 51 - Targets positive: 6 - Total targets decimal: 8.11 - weighted and averaged limit 859.23 [mg/kg/day]
REP - total targets: 33 - Targets positive: 4 - Total targets decimal: 4.96 - weighted and averaged limit 284.18 [mg/kg/day]
CHR - total targets: 77 - Targets positive: 22 - Total targets decimal: 26.60 - weighted and averaged limit 92372.02 [mg/kg/day]
SUB - total targets: 93 - Targets positive: 33 - Total targets decimal: 37.01 - weighted and averaged limit 165337.13 [mg/kg/day]


Unnamed: 0,preferred name,D-Limonene,Limonene,R-(+)-Pulegone,alpha-Isomethylionone,1-trans-delta-9-Tetrahydrocannabinol,Methacrylonitrile,Methacrylamide,Trinexapac-ethyl,4-Chloro-3-methylphenol,Prohexadione-calcium,Isophorone,Diethyl phthalate,Cyprodinil,Tetramethrin,Captafol,o-Cresol
0,dsstox_sid,DTXSID1020778,DTXSID2029612,DTXSID2025975,DTXSID7027047,DTXSID6021327,DTXSID1024176,DTXSID8029600,DTXSID9032535,DTXSID4021717,DTXSID9034496,DTXSID8020759,DTXSID7021780,DTXSID1032359,DTXSID6032649,DTXSID4020242,DTXSID8021808
1,molecular weight,136.238,136.238,152.237,206.329,314.469,67.091,85.106,252.266,142.580,250.263,138.210,222.240,225.295,331.412,349.050,108.140
2,similarity,1.00001,1,0.16666666666666666,0.15789473684210525,0.15,0.14705882352941177,0.14285714285714285,0.14035087719298245,0.13333333333333333,0.1320754716981132,0.13043478260869565,0.125,0.125,0.125,0.12280701754385964,0.12195121951219512


MGR - total targets: 57 - Targets positive: 8 - Total targets decimal: 11.97 - weighted and averaged limit 87080.62 [mg/kg/day]
DEV - total targets: 38 - Targets positive: 13 - Total targets decimal: 13.21 - weighted and averaged limit 50240.22 [mg/kg/day]
SAC - total targets: 63 - Targets positive: 9 - Total targets decimal: 7.70 - weighted and averaged limit 17437.99 [mg/kg/day]
CHR - total targets: 77 - Targets positive: 26 - Total targets decimal: 29.48 - weighted and averaged limit 69375.50 [mg/kg/day]
SUB - total targets: 96 - Targets positive: 31 - Total targets decimal: 35.52 - weighted and averaged limit 230734.19 [mg/kg/day]


Unnamed: 0,preferred name,Nitrogen,Cyanamide,Acetonitrile,Calcium cyanamide,Dibromoacetonitrile,Acrylonitrile,Dichloroacetonitrile,Propionitrile
0,dsstox_sid,DTXSID4036304,DTXSID9034490,DTXSID7020009,DTXSID6020353,DTXSID3024940,DTXSID5020029,DTXSID3021562,DTXSID1021879
1,molecular weight,28.014,42.041,41.053,80.103,198.845,53.064,109.940,55.080
2,similarity,1.00001,0.14285714285714285,0.14285714285714285,0.125,0.1,0.1,0.1,0.1


SUB - total targets: 89 - Targets positive: 31 - Total targets decimal: 29.46 - weighted and averaged limit 8060.36 [mg/kg/day]
CHR - total targets: 84 - Targets positive: 35 - Total targets decimal: 37.05 - weighted and averaged limit 4267.93 [mg/kg/day]
MGR - total targets: 40 - Targets positive: 3 - Total targets decimal: 3.00 - weighted and averaged limit 43.75 [mg/kg/day]
DEV - total targets: 35 - Targets positive: 12 - Total targets decimal: 12.85 - weighted and averaged limit 15145.34 [mg/kg/day]


Unnamed: 0,preferred name,Carbon dioxide,Dibromoacetonitrile,"1,6-Diisocyanatohexane",Silica,Cyanamide,Formamide,Sulfuryl fluoride,Arsenic acid,Urea,Chromic(VI) acid,Acetonitrile,"2,2-Dibromo-3-nitrilopropionamide",Chlorothalonil,Sodium nitrite,Barium metaborate,Trisodium arsenate
0,dsstox_sid,DTXSID4027028,DTXSID3024940,DTXSID4024143,DTXSID1029677,DTXSID9034490,DTXSID8025337,DTXSID9034945,DTXSID1034341,DTXSID4021426,DTXSID8034455,DTXSID7020009,DTXSID5032361,DTXSID0020319,DTXSID0020941,DTXSID1034347,DTXSID9039367
1,molecular weight,44.009,198.845,168.196,60.083,42.041,45.041,102.050,141.942,60.056,118.008,41.053,241.870,265.900,68.995,222.940,207.887
2,similarity,1.00001,0.18181818181818182,0.15789473684210525,0.14285714285714285,0.1111111111111111,0.1111111111111111,0.1111111111111111,0.1111111111111111,0.1111111111111111,0.1111111111111111,0.1111111111111111,0.10526315789473684,0.1,0.1,0.1,0.1


MGR - total targets: 48 - Targets positive: 9 - Total targets decimal: 12.86 - weighted and averaged limit 16475.97 [mg/kg/day]
DEV - total targets: 49 - Targets positive: 24 - Total targets decimal: 25.48 - weighted and averaged limit 94434.88 [mg/kg/day]
SAC - total targets: 54 - Targets positive: 10 - Total targets decimal: 17.00 - weighted and averaged limit 161375.00 [mg/kg/day]
CHR - total targets: 92 - Targets positive: 37 - Total targets decimal: 45.10 - weighted and averaged limit 37591.27 [mg/kg/day]
SUB - total targets: 100 - Targets positive: 39 - Total targets decimal: 43.81 - weighted and averaged limit 172850.40 [mg/kg/day]


Unnamed: 0,preferred name,Formaldehyde,Acrylamide,Silica,Methacrylamide,Formamide,Sulfuryl fluoride,Arsenic acid,Urea,Chromic(VI) acid,Sodium nitrite,"2,2-Dibromo-3-nitrilopropionamide",Barium metaborate,Trisodium arsenate,Silver nitrate,Dimethylarsinic acid,Acetamide
0,dsstox_sid,DTXSID7020637,DTXSID5020027,DTXSID1029677,DTXSID8029600,DTXSID8025337,DTXSID9034945,DTXSID1034341,DTXSID4021426,DTXSID8034455,DTXSID0020941,DTXSID5032361,DTXSID1034347,DTXSID9039367,DTXSID3032042,DTXSID7020508,DTXSID7020005
1,molecular weight,30.026,71.079,60.083,85.106,45.041,102.050,141.942,60.056,118.008,68.995,241.870,222.940,207.887,169.872,137.998,59.068
2,similarity,1.00001,0.16666666666666666,0.16666666666666666,0.15384615384615385,0.125,0.125,0.125,0.125,0.125,0.1111111111111111,0.1111111111111111,0.1111111111111111,0.1111111111111111,0.1111111111111111,0.1,0.1


MGR - total targets: 47 - Targets positive: 10 - Total targets decimal: 12.54 - weighted and averaged limit 11249.90 [mg/kg/day]
DEV - total targets: 46 - Targets positive: 19 - Total targets decimal: 21.07 - weighted and averaged limit 38230.93 [mg/kg/day]
SAC - total targets: 53 - Targets positive: 22 - Total targets decimal: 22.00 - weighted and averaged limit 194958.00 [mg/kg/day]
CHR - total targets: 72 - Targets positive: 17 - Total targets decimal: 22.43 - weighted and averaged limit 30239.86 [mg/kg/day]
SUB - total targets: 100 - Targets positive: 45 - Total targets decimal: 48.13 - weighted and averaged limit 162700.78 [mg/kg/day]
[55, 12, 8.5, 3469.125, 'MGR']
[48, 23, 23.500999999999998, 87704.346, 'DEV']
[74, 18, 17.102999999999998, 94654.43500000001, 'SAC']
[78, 24, 27.83, 9685.57155, 'CHR']
[92, 32, 33.754, 84568.60789749998, 'SUB']
[53, 4, 4.0, 560.0, 'MGR']
[55, 29, 31.153, 105303.36472817462, 'DEV']
[43, 3, 3.0, 61250.0, 'SAC']
[34, 10, 10.0, 47812.5, 'REP']
[60, 5, 8.

# Usefull links and websites:
https://www.chemspider.com/ not used that much

https://comptox.epa.gov/genra/DTXCID50778 The code here 100% relies on this software, the comptox genRA package, generalized read-across

https://cfpub.epa.gov/ecotox/explore.cfm?sub=Effects don't remember, but has been used

https://comptox.epa.gov/dashboard/batch-search powerful database, has been used to find details

https://sandbox.ntp.niehs.nih.gov/tox21-curve-visualization/ usefull database but not clear how I can use it for an index

https://pubchem.ncbi.nlm.nih.gov/#query=50-00-0%20OR%207727-37-9%20OR%20124-38-9%20OR%20138-86-3%20OR%2010028-15-6&tab=compound contains a lot of chemical and physical data in general

https://echa.europa.eu/information-on-chemicals/cl-inventory-database?p_p_id=dissclinventory_WAR_dissclinventoryportlet&p_p_lifecycle=0&p_p_state=normal&p_p_mode=view handy lookup website for EU CLP regulation, starting point before finding MGR, DEV, SAC, CHR and SUB

https://ice.ntp.niehs.nih.gov/Tools yet another usefull website but not used that much

Cas No/chemical were interested in
* 50-00-0 CH2O Formaldehyde
* 7727-37-9 N2 Nitrogen
* 124-38-9 CO2 Carbon Dioxide
* 138-86-3 / 5989-59-8 C10H16 Limonene
* 10028-15-6 O3 Ozone

In [161]:
from pivottablejs import pivot_ui

pivot_ui(target_DF)

In [6]:
PVTable

Formula,C10H16,C10H16,C10H16,C10H16,C10H16,C10H16O2,C10H16O2,C10H16O2,C10H16O2,C8H14O,...,CO2,N2,N2,N2,N2,O3,O3,O3,O3,O3
Testgroup,CHR,DEV,MGR,SAC,SUB,DEV,MGR,REP,SUB,CHR,...,SUB,CHR,DEV,MGR,SUB,CHR,DEV,MGR,SAC,SUB
Effectgroup,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
:5' nucleotidase,0.0,0.0,0.000,0.0,0.229991,0.0,0.0,0.0,0.000000,,...,0.000000,0.0,0.0,0.0,0.250,0.0,0.0,0.0000,0.0,0.000000
:[other],0.0,0.0,0.000,0.0,0.459982,0.0,0.0,0.0,0.361795,,...,0.622278,0.0,0.0,0.0,0.250,0.0,0.0,1.0000,0.0,0.000000
:aborted,0.0,0.0,0.074,0.0,0.000000,0.0,0.0,0.0,0.000000,,...,0.000000,0.0,0.0,0.0,0.000,0.0,0.0,0.1112,0.0,0.000000
:adrenal gland,0.0,0.0,0.000,0.0,0.170653,0.0,0.0,0.0,0.077967,,...,0.054553,0.0,0.0,0.0,0.000,0.0,0.0,0.0000,0.0,0.058168
:alanine aminotransferase (alt/sgpt),0.0,0.0,0.000,0.0,0.367065,0.0,0.0,0.0,0.054088,,...,0.553412,0.0,0.0,0.0,0.375,0.0,0.0,0.0000,0.0,0.298267
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
:vagina,0.0,0.0,0.000,0.0,0.271159,0.0,0.0,0.0,0.180897,,...,0.000000,0.0,0.0,0.0,0.000,0.0,0.0,0.0000,0.0,0.000000
:vaginal opening,0.0,0.0,0.459,0.0,0.000000,0.0,0.0,0.0,0.000000,,...,0.000000,0.0,0.0,0.0,0.000,0.0,0.0,0.0000,0.0,0.000000
:viability index,0.0,0.0,0.177,0.0,0.000000,0.0,0.0,0.0,0.000000,,...,0.000000,0.0,0.0,1.0,0.000,0.0,0.0,0.0000,0.0,0.000000
:volume,0.0,0.0,0.000,0.0,0.121435,0.0,0.0,0.0,0.061143,,...,0.077785,0.0,0.0,0.0,0.000,0.0,0.0,0.0000,0.0,0.196576


In [208]:
df[df.Formula == "O3"]

Unnamed: 0,Formula,Name,total targets,positive targets,positive targets decimal,weighted and averaged limit [mg/kg/day],Effectgroup,Testgroup
0,O3,Ozone,2,0,0.000,0.0000,:sex ratio,MGR
1,O3,Ozone,1,1,1.000,68.7500,:bile duct,CHR
2,O3,Ozone,2,0,0.000,0.0000,:gestation index,MGR
3,O3,Ozone,3,0,0.144,8.0640,:salivary glands,SUB
4,O3,Ozone,1,0,0.000,0.0000,:oviduct,MGR
...,...,...,...,...,...,...,...,...
135,O3,Ozone,2,2,1.242,3111.5205,:urea nitrogen,SUB
136,O3,Ozone,2,2,1.512,4915.1496,:triglycerides,SUB
137,O3,Ozone,4,0,0.000,0.0000,:seminal vesicle,SUB
138,O3,Ozone,3,3,2.468,8901.3995,:hematocrit (hct),SUB


In [36]:
label = "genra_O3"
HID, effectLabels, targetLabels, md, chemName = computeHealthIndex(label)#Ozone
metaDataHID = metaDataGenEffects(effectLabels, HID)
metaDataHIDt = metaDataGenTargets(targetLabels,HID)
metaDataReport(effectLabels,metaDataHID,md,label)
TargetEffectLabels = TrackTargetEffectLabels(HID,TargetEffectLabels)

Unnamed: 0,preferred name,Ozone,Sodium nitrite,Barium metaborate,Trisodium arsenate,Silver nitrate,Monosodium methanearsonate,Potassium dichromate,Trichloronitromethane,Sodium dichromate dihydrate,Sodium fluoroacetate,Sodium trichloroacetate,"Butanedioic acid, 2,3-dihydroxy- (2R,3R)-, sodium salt (1:2)",Tetrakis(hydroxymethyl)phosphonium sulfate,Silica,Lithium hypochlorite,"2-(Hydroxymethyl)-2-nitro-1,3-propanediol"
0,dsstox_sid,DTXSID0021098,DTXSID0020941,DTXSID1034347,DTXSID9039367,DTXSID3032042,DTXSID2025686,DTXSID5025948,DTXSID0020315,DTXSID6032061,DTXSID8024311,DTXSID6034924,DTXSID2057861,DTXSID0021331,DTXSID1029677,DTXSID1034688,DTXSID8027034
1,molecular weight,47.997,68.995,222.940,207.887,169.872,161.951,294.182,164.370,297.995,100.024,185.360,194.050,406.280,60.083,58.390,151.118
2,similarity,1.00001,0.18181818181818182,0.18181818181818182,0.18181818181818182,0.18181818181818182,0.13333333333333333,0.13333333333333333,0.13333333333333333,0.125,0.125,0.125,0.11764705882352941,0.1111111111111111,0.1111111111111111,0.1111111111111111,0.10526315789473684


MGR - total targets: 55 - Targets positive: 12 - Total targets decimal: 8.50 - weighted and averaged limit 3469.12 [mg/kg/day]
DEV - total targets: 48 - Targets positive: 23 - Total targets decimal: 23.50 - weighted and averaged limit 87704.35 [mg/kg/day]
SAC - total targets: 74 - Targets positive: 18 - Total targets decimal: 17.10 - weighted and averaged limit 94654.44 [mg/kg/day]
CHR - total targets: 78 - Targets positive: 24 - Total targets decimal: 27.83 - weighted and averaged limit 9685.57 [mg/kg/day]
SUB - total targets: 92 - Targets positive: 32 - Total targets decimal: 33.75 - weighted and averaged limit 84568.61 [mg/kg/day]


In [31]:
columns[1]

'Ozone'