# **Network Construction**
Habit Forming Medications

# Import Libraries

In [1]:
import ast
from collections import defaultdict
from itertools import combinations

import pandas as pd

# Function

In [2]:
def calculate_jaccard_associations(neighbors):
    # Create a list to store the results
    results = list()

    # Compute the Jaccard index between each combination of different nodes
    for node_a, node_b in combinations(neighbors, 2):
        # Get the sets of neighbors from both nodes
        set_a = neighbors[node_a]
        set_b = neighbors[node_b]
        
        # Compute the Jaccard index between both sets
        intersection = len(set_a & set_b)
        union = len(set_a | set_b)
        jaccard = intersection / union if union else 0
        
        # Insert the result for this combination in the list
        results.append((node_a, node_b, jaccard))
    
    # Create a DataFrame for the inferred associations
    columns = ['source', 'target', 'association']
    df_associations = pd.DataFrame(results, columns=columns) \
        .sort_values(by='association', ascending=False) \
        .query('association > 0') \
        .reset_index(drop=True)
    
    return df_associations

# Load the Processed Dataset

In [3]:
# DataFrame with processed data about medications and side effects
df_medications = pd.read_csv('../data/processed/medicine-dataset.csv')

# Convert side effects and habit forming representations of sets to actual sets
df_medications['side_effects'] = \
    df_medications['side_effects'].apply(ast.literal_eval)
df_medications['habit_forming'] = \
    df_medications['habit_forming'].apply(ast.literal_eval)

# Print the DataFrame
df_medications

Unnamed: 0,name,side_effects,uses,chemical_class,habit_forming,therapeutic_class,action_class
0,a 1 5mg tablet,"{sleepiness, nausea, headache, fatigue, abdomi...",{'prevention of angina (heart-related chest pa...,{'dihydropyridinecarboxylic acids derivatives'},{no},{'cardiac'},{'calcium channel blockers- dihydropyridines (...
1,a 250 suspension,"{nausea, stomach pain, vomiting, indigestion}","{' pain relief', 'treatment of fever'}",{'p-aminophenol derivative'},{no},{'pain analgesics'},{'analgesic & antipyretic-pcm'}
2,a 3 100 mg/500 mg tablet,"{increased liver enzymes, nausea, vomiting, di...",{' pain relief'},{nan},{no},{'pain analgesics'},{nan}
3,a arti 60mg injection,"{injection site reactions (pain, swelling, red...",{' malaria'},{'sesquiterpene lactones'},{no},{'anti malarials'},{'antimalarial- artemisinin and derivatives'}
4,a arti l 80mg/480mg tablet,"{joint pain, dizziness, weakness, loss of appe...",{'treatment of malaria'},{nan},{no},{'anti malarials'},{nan}
...,...,...,...,...,...,...,...
222820,zyxtil 500mg tablet,"{allergic reaction, diarrhea, nausea, rash, vo...",{'treatment of bacterial infections'},{'intermediate spectrum {second generation cep...,{no},{'anti infectives'},{'cephalosporins: 2nd generation'}
222821,zyzer syrup,"{sleepiness, blurred vision, dryness in mouth,...",{' appetite stimulant'},{nan},{no},{'vitamins minerals nutrients'},{nan}
222822,zyzine 25mg tablet,"{nausea, vomiting, constipation, upset stomach...","{'treatment of anxiety', 'treatment of skin co...",{'piperazine derivative'},{no},{'respiratory'},{'h1 antihistaminics (first generation)'}
222823,zyzolide 600mg tablet,"{diarrhea, nausea, vomiting, decreased blood c...",{'treatment of severe bacterial infections'},{'oxazolidinone derivative'},{no},{'anti infectives'},{'oxazolidinone'}


# Bipartite Network

In [4]:
# Explode the side effects and habit forming columns
df_medications = df_medications \
    .explode(column='side_effects') \
    .explode(column='habit_forming')

# Filter the habit forming medications
df_medications = df_medications.query('habit_forming == "yes"')

In [5]:
# Create a DataFrame only for medications
df_meds = df_medications \
    .drop(columns='side_effects') \
    .rename(columns={'name': 'id'}) \
    .drop_duplicates() \
    .reset_index(drop=True)
df_meds['label'] = df_meds['id']
df_meds['type'] = 'medication'

# Print the DataFrame of medications
df_meds

Unnamed: 0,id,uses,chemical_class,habit_forming,therapeutic_class,action_class,label,type
0,a-tryp forte tablet,{'treatment of depression'},{nan},yes,{'neuro cns'},{nan},a-tryp forte tablet,medication
1,aaram 0.5mg tablet,"{'treatment of anxiety', 'treatment of panic d...",{'benzodiazepines derivative'},yes,{'neuro cns'},{'benzodiazepines'},aaram 0.5mg tablet,medication
2,aaram tablet,"{'treatment of anxiety', 'treatment of panic d...",{'benzodiazepines derivative'},yes,{'neuro cns'},{'benzodiazepines'},aaram tablet,medication
3,abdocool tablet,{'treatment of irritable bowel syndrome'},{nan},yes,{'gastro intestinal'},{nan},abdocool tablet,medication
4,abidol 100mg injection,{' moderate to severe pain'},{'anisole derivative'},yes,{'pain analgesics'},{'opioids'},abidol 100mg injection,medication
...,...,...,...,...,...,...,...,...
5448,zytram p 37.5mg/325mg tablet,{' moderate to severe pain'},{nan},yes,{'pain analgesics'},{nan},zytram p 37.5mg/325mg tablet,medication
5449,zytram pd tablet,"{'treatment of severe acute pain', 'treatment ...",{nan},yes,{'pain analgesics'},{nan},zytram pd tablet,medication
5450,zytramol tablet,{' moderate to severe pain'},{nan},yes,{'pain analgesics'},{nan},zytramol tablet,medication
5451,zyven od plus 100 mg/0.5 mg tablet,{'treatment of depression'},{nan},yes,{'neuro cns'},{nan},zyven od plus 100 mg/0.5 mg tablet,medication


In [6]:
# Create a DataFrame only for side effects
df_effects = df_medications \
    [['side_effects']] \
    .rename(columns={'side_effects': 'id'}) \
    .drop_duplicates() \
    .reset_index(drop=True)
df_effects['label'] = df_effects['id']
df_effects['type'] = 'side effect'

# Print the DataFrame of side effects
df_effects

Unnamed: 0,id,label,type
0,weight gain,weight gain,side effect
1,memory impairment,memory impairment,side effect
2,difficulty in urination,difficulty in urination,side effect
3,tiredness,tiredness,side effect
4,depression,depression,side effect
...,...,...,...
127,circulatory disorder,circulatory disorder,side effect
128,cardiac arrest,cardiac arrest,side effect
129,shock,shock,side effect
130,apnea (absence of breathing),apnea (absence of breathing),side effect


In [7]:
# Create a DataFrame for the nodes
df_bipatite_nodes = pd.concat(
    objs=[df_meds, df_effects], ignore_index=True
)

# Store the DataFrame of bipartite network nodes
df_bipatite_nodes.to_csv(
    '../data/processed/bipartite-network-nodes.csv', index=False
)

# Print the DataFrame of bipartite network nodes
df_bipatite_nodes

Unnamed: 0,id,uses,chemical_class,habit_forming,therapeutic_class,action_class,label,type
0,a-tryp forte tablet,{'treatment of depression'},{nan},yes,{'neuro cns'},{nan},a-tryp forte tablet,medication
1,aaram 0.5mg tablet,"{'treatment of anxiety', 'treatment of panic d...",{'benzodiazepines derivative'},yes,{'neuro cns'},{'benzodiazepines'},aaram 0.5mg tablet,medication
2,aaram tablet,"{'treatment of anxiety', 'treatment of panic d...",{'benzodiazepines derivative'},yes,{'neuro cns'},{'benzodiazepines'},aaram tablet,medication
3,abdocool tablet,{'treatment of irritable bowel syndrome'},{nan},yes,{'gastro intestinal'},{nan},abdocool tablet,medication
4,abidol 100mg injection,{' moderate to severe pain'},{'anisole derivative'},yes,{'pain analgesics'},{'opioids'},abidol 100mg injection,medication
...,...,...,...,...,...,...,...,...
5580,circulatory disorder,,,,,,circulatory disorder,side effect
5581,cardiac arrest,,,,,,cardiac arrest,side effect
5582,shock,,,,,,shock,side effect
5583,apnea (absence of breathing),,,,,,apnea (absence of breathing),side effect


In [8]:
# Create a DataFrame for the edges
df_bipatite_edges = df_medications \
    [['name', 'side_effects']] \
    .rename(columns={'name': 'source', 'side_effects': 'target'}) \
    .reset_index(drop=True)

# Store the DataFrame of bipartite network edges
df_bipatite_edges.to_csv(
    '../data/processed/bipartite-network-edges.csv', index=False
)

# Store the DataFrame of bipartite network edges
df_bipatite_edges

Unnamed: 0,source,target
0,a-tryp forte tablet,weight gain
1,a-tryp forte tablet,memory impairment
2,a-tryp forte tablet,difficulty in urination
3,a-tryp forte tablet,tiredness
4,a-tryp forte tablet,depression
...,...,...
45027,zyven od plus 50 mg/0.5 mg tablet,uncoordinated body movements
45028,zyven od plus 50 mg/0.5 mg tablet,constipation
45029,zyven od plus 50 mg/0.5 mg tablet,headache
45030,zyven od plus 50 mg/0.5 mg tablet,confusion


# Medication Projection

In [9]:
# # Determine the set of neighbors for each node
# neighbors = defaultdict(set)
# for _, row in df_bipatite_edges.iterrows():
#    neighbors[row['source']].add(row['target'])

# df_meds_proj = calculate_jaccard_associations(neighbors)

In [10]:
# df_meds_proj.query('association == 1')

# Side Effects Projection

In [11]:
# # Determine the set of neighbors for each node
# neighbors = defaultdict(set)
# for _, row in df_bipatite_edges.iterrows():
#    neighbors[row['target']].add(row['source'])

# df_effects_proj = calculate_jaccard_associations(neighbors)

In [12]:
# df_effects_proj.query('association == 1')