In [1]:
# Imports
%run ./ClinicalTrialFunctions.py

# Load data
clinical_df = pd.read_parquet('./exampleFile/DTxClinicalTrials.parquet.gzip')
colors = {'Observational': "steelblue", 'Interventional': "orange"}


In [77]:
### Examples of conditions mapping to specific mesh groups
clinical_df["conditionsJoined"] = [",".join(c) for c in clinical_df["Condition"]]

# MeSH groups for DTx to treat Addiction (eg. smoking addiction), 
print('***************')
print("Conditions containing terms 'addiction', 'cessation', 'smoking,' or 'drugs'")
drug_df = clinical_df[clinical_df["conditionsJoined"].str.lower().str.contains("addiction|cessation|smoking|drugs")]
print(drug_df["conditionMeshMainBranch"].value_counts())
print()
print("Conditions listed for Behavior and Behavior Mechanisms")
print(drug_df[drug_df["conditionMeshMainBranch"]!="Chemically-Induced Disorders"]['conditionsJoined'].values)
print()
print("Conditions listed for Chemically-Induced Disorders")
print(drug_df[drug_df["conditionMeshMainBranch"]=="Chemically-Induced Disorders"]['conditionsJoined'].values)
print('\n***************\n')

# Mental health (eg. depression and anxiety)
print("Conditions containing terms 'depression' or 'anxiety'")
mental_health_df = clinical_df[clinical_df["conditionsJoined"].str.lower().str.contains("depression|anxiety")]
print(mental_health_df["conditionMeshMainBranch"].value_counts())
print()

# Which conditions conditions containing depression or anxity are listed as "behavior" vs "mental disorder"?
print("Conditions listed for Behavior and Behavior Mechanisms")
print(mental_health_df[mental_health_df["conditionMeshMainBranch"]=="Behavior and Behavior Mechanisms"]['conditionsJoined'].values)
print()
print("Conditions listed for Mental Disorders")
print(mental_health_df[mental_health_df["conditionMeshMainBranch"]=="Mental Disorders"]['conditionsJoined'].values)
print('\n***************\n')

# Diabetes
print("Conditions containing terms 'diabetes', 'T1D', or 'T2D'")
diabetes_df = clinical_df[clinical_df["conditionsJoined"].str.lower().str.contains("diabetes|t1d|t2d")]
print(diabetes_df["conditionMeshMainBranch"].value_counts())
print(diabetes_df[diabetes_df["conditionMeshMainBranch"]=="Urogenital Diseases"]['conditionsJoined'].values)

print('\n***************\n')

# Examples of nervous system diseases
print("Most common Nervous System Diseases")
nervous_df = clinical_df[clinical_df["conditionMeshMainBranch"]=="Nervous System Diseases"]
nervous_df = nervous_df.explode("Condition")
print(nervous_df["Condition"].value_counts().iloc[:10])

print('\n***************\n')

# Examples of nervous system diseases
print("Most common Nutritional and Metabolic Diseases ")
nervous_df = clinical_df[clinical_df["conditionMeshMainBranch"]=="Nutritional and Metabolic Diseases"]
nervous_df = nervous_df.explode("Condition")
print(nervous_df["Condition"].value_counts().iloc[:10])

print('\n***************\n')

# Examples of nervous system diseases
print("Most common Pathological Conditions")
nervous_df = clinical_df[clinical_df["conditionMeshMainBranch"].str.contains("Pathological Conditions")]
nervous_df = nervous_df.explode("Condition")
print(nervous_df["Condition"].value_counts().iloc[:10])



***************
Conditions containing terms 'addiction', 'cessation', 'smoking,' or 'drugs'
Behavior and Behavior Mechanisms    7
Chemically-Induced Disorders        1
Name: conditionMeshMainBranch, dtype: int64

Conditions listed for Behavior and Behavior Mechanisms
['Smoking Cessation,Smoking Behaviors,Smoking Reduction,Smoking, Cigarette,Smoking,Nicotine Dependence'
 'Smoking Cessation,Smoking,Smoking Behaviors,Smoking Reduction,Smoking, Tobacco,Smoking, Cigarette,Hiv,HIV/AIDS'
 'HIV/AIDS,Smoking Cessation,Tobacco Use Disorder'
 'Depression,Addiction,Anxiety,Sleep Disturbance' 'Smoking Cessation'
 'Smoking,Smoking Cessation' 'Smoking Cessation']

Conditions listed for Chemically-Induced Disorders
['Smoking Cessation,Nicotine Addiction,Drug Addiction,Drug Dependence,Tobacco Dependence,Tobacco Use Disorder,Substance Use Disorder,Tobacco Smoking']

***************

Conditions containing terms 'depression' or 'anxiety'
Behavior and Behavior Mechanisms               16
Mental Disorders  

In [143]:
### Do the phases/study design differ based on disease areas?
from scipy.stats import chi2_contingency

# Load data only for MeSH groups with at least 10 trials
clinical_df = pd.read_parquet('./exampleFile/DTxClinicalTrials.parquet.gzip')
values = clinical_df["conditionMeshMainBranch"].value_counts()
mesh_df = clinical_df[clinical_df["conditionMeshMainBranch"].isin(values[values>10].index)]

# Interventional trials only
mesh_df = mesh_df[mesh_df["StudyType"]=="Interventional"]

# chi-square test for design allocation (randomized, non-randomized, or NA)
chi_df = pd.crosstab(mesh_df["conditionMeshMainBranch"], mesh_df["DesignAllocation"], normalize="index")
_, p_value, _, _ = chi2_contingency(chi_df)
print("Significant difference in Design allocation: %s"%(p_value < 0.05))

# chi-square test for design allocation ('Single Group Assignment', 'Parallel Assignment',
# 'Sequential Assignment', 'Crossover Assignment', 'Factorial Assignment'
chi_df = pd.crosstab(mesh_df["conditionMeshMainBranch"], mesh_df["DesignInterventionModel"], normalize="index")
_, p_value, _, _ = chi2_contingency(chi_df)
print("Significant difference in DesignInterventionModel: %s"%(p_value < 0.05))

# chi-square test for design allocation
chi_df = pd.crosstab(mesh_df["conditionMeshMainBranch"], mesh_df["PhaseClean"], normalize="index")
_, p_value, _, _ = chi2_contingency(chi_df)
print("Significant difference in Phase: %s"%(p_value < 0.05))


Significant difference in design allocation: False
Significant difference in DesignInterventionModel: False
Significant difference in Phase: False


In [99]:
### What is the status of these trials?
clinical_df["OverallStatus"].value_counts()


Recruiting                 170
Completed                  168
Not yet recruiting          54
Active, not recruiting      33
Enrolling by invitation     24
Name: OverallStatus, dtype: int64