# **Network Construction**

# Import Libraries

In [1]:
import ast
from collections import defaultdict
from itertools import combinations

import pandas as pd

# Function

In [2]:
def calculate_jaccard_associations(neighbors):
    # Create a list to store the results
    results = list()

    # Compute the Jaccard index between each combination of different nodes
    for node_a, node_b in combinations(neighbors, 2):
        # Get the sets of neighbors from both nodes
        set_a = neighbors[node_a]
        set_b = neighbors[node_b]
        
        # Compute the Jaccard index between both sets
        intersection = len(set_a & set_b)
        union = len(set_a | set_b)
        jaccard = intersection / union if union else 0
        
        # Insert the result for this combination in the list
        results.append((node_a, node_b, jaccard))
    
    # Create a DataFrame for the inferred associations
    columns = ['source', 'target', 'association']
    df_associations = pd.DataFrame(results, columns=columns) \
        .sort_values(by='association', ascending=False)
    
    return df_associations

# Load the Processed Dataset

In [3]:
# DataFrame with processed data about medications and side effects
df_medications = pd.read_csv('../data/processed/medicine-dataset.csv')

# Convert side effects representations of sets to actual sets
df_medications['side_effects'] = \
    df_medications['side_effects'].apply(ast.literal_eval)

# Print the DataFrame
df_medications

Unnamed: 0,name,side_effects,uses,chemical_class,habit_forming,therapeutic_class,action_class
0,1 al plus 5mg/120mg capsule,"{sleepiness, headache, vomiting, dryness in mo...",{' sneezing and runny nose due to allergies'},{nan},{'no'},{'respiratory'},{nan}
1,1 nvp tablet,"{sleepiness, constipation, dryness in mouth, d...",{'treatment of nausea and vomiting in pregnancy'},{nan},{'no'},{'gastro intestinal'},{nan}
2,1-al 10 tablet,"{sleepiness, headache, nasopharyngitis (inflam...",{'treatment of allergic conditions'},{'piperazine derivatives'},{'no'},{'respiratory'},{'h1 antihistaminics (second generation)'}
3,1-al m syrup,"{sleepiness, headache, skin rash, dryness in m...",{'treatment of sneezing and runny nose due to ...,{nan},{'no'},{'respiratory'},{nan}
4,1-al syrup,"{sleepiness, constipation, headache, nasophary...",{'treatment of allergic conditions'},{'piperazine derivatives'},{'no'},{'respiratory'},{'h1 antihistaminics (second generation)'}
...,...,...,...,...,...,...,...
230601,zyxtil 500mg tablet,"{vomiting, nausea, rash, allergic reaction, di...",{'treatment of bacterial infections'},{'intermediate spectrum {second generation cep...,{'no'},{'anti infectives'},{'cephalosporins: 2nd generation'}
230602,zyzer syrup,"{sleepiness, constipation, dryness in mouth, d...",{' appetite stimulant'},{nan},{'no'},{'vitamins minerals nutrients'},{nan}
230603,zyzine 25mg tablet,"{constipation, vomiting, sedation, nausea, ups...","{'treatment of anxiety', 'treatment of skin co...",{'piperazine derivative'},{'no'},{'respiratory'},{'h1 antihistaminics (first generation)'}
230604,zyzolide 600mg tablet,"{headache, vomiting, nausea, decreased blood c...",{'treatment of severe bacterial infections'},{'oxazolidinone derivative'},{'no'},{'anti infectives'},{'oxazolidinone'}


# Bipartite Network

In [4]:
# Explode the side effects column
df_medications = df_medications.explode(column='side_effects')

# Create a DataFrame only for medications
df_meds = df_medications \
    .drop(columns='side_effects') \
    .rename(columns={'name': 'id'}) \
    .drop_duplicates()
df_meds['label'] = df_meds['id']
df_meds['type'] = 'medication'

# Create a DataFrame only for side effects
df_effects = df_medications \
    [['side_effects']] \
    .rename(columns={'side_effects': 'id'}) \
    .drop_duplicates()
df_effects['label'] = df_effects['id']
df_effects['type'] = 'side effect'

# Create a DataFrame for the nodes
df_bipatite_nodes = pd.concat(
    objs=[df_meds, df_effects], ignore_index=True
)

# Create a DataFrame for the edges
df_bipatite_edges = df_medications \
    [['name', 'side_effects']] \
    .rename(columns={'name': 'source', 'side_effects': 'target'}) \
    .reset_index(drop=True)

In [5]:
# Store the DataFrame of bipartite network nodes
df_bipatite_nodes.to_csv(
    '../data/processed/bipartite-network-nodes.csv', index=False
)

# Print the DataFrame of bipartite network nodes
df_bipatite_nodes

Unnamed: 0,id,uses,chemical_class,habit_forming,therapeutic_class,action_class,label,type
0,1 al plus 5mg/120mg capsule,{' sneezing and runny nose due to allergies'},{nan},{'no'},{'respiratory'},{nan},1 al plus 5mg/120mg capsule,medication
1,1 nvp tablet,{'treatment of nausea and vomiting in pregnancy'},{nan},{'no'},{'gastro intestinal'},{nan},1 nvp tablet,medication
2,1-al 10 tablet,{'treatment of allergic conditions'},{'piperazine derivatives'},{'no'},{'respiratory'},{'h1 antihistaminics (second generation)'},1-al 10 tablet,medication
3,1-al m syrup,{'treatment of sneezing and runny nose due to ...,{nan},{'no'},{'respiratory'},{nan},1-al m syrup,medication
4,1-al syrup,{'treatment of allergic conditions'},{'piperazine derivatives'},{'no'},{'respiratory'},{'h1 antihistaminics (second generation)'},1-al syrup,medication
...,...,...,...,...,...,...,...,...
231655,bone weakness,,,,,,bone weakness,side effect
231656,tightness sensation,,,,,,tightness sensation,side effect
231657,feeling of cold,,,,,,feeling of cold,side effect
231658,delusion,,,,,,delusion,side effect


In [6]:
# Store the DataFrame of bipartite network edges
df_bipatite_edges.to_csv(
    '../data/processed/bipartite-network-edges.csv', index=False
)

# Store the DataFrame of bipartite network edges
df_bipatite_edges

Unnamed: 0,source,target
0,1 al plus 5mg/120mg capsule,sleepiness
1,1 al plus 5mg/120mg capsule,headache
2,1 al plus 5mg/120mg capsule,vomiting
3,1 al plus 5mg/120mg capsule,dryness in mouth
4,1 al plus 5mg/120mg capsule,nausea
...,...,...
1506039,α-bert 150 injection,headache
1506040,α-bert 150 injection,vomiting
1506041,α-bert 150 injection,nausea
1506042,α-bert 150 injection,abdominal pain


# Medication Projection

In [7]:
# # Determine the set of neighbors for each node
# neighbors = defaultdict(set)
# for _, row in df_bipatite_edges.iterrows():
#    neighbors[row['source']].add(row['target'])

# calculate_jaccard_associations(neighbors)

# Side Effects Projection

In [8]:
# # Determine the set of neighbors for each node
# neighbors = defaultdict(set)
# for _, row in df_bipatite_edges.iterrows():
#    neighbors[row['target']].add(row['source'])

# calculate_jaccard_associations(neighbors)