In [None]:
import networkx as nx
import pandas as pd
import json

# Read the data into a Pandas DataFrame from the CSV file
data = pd.read_csv("deaths_gdp_obesity_sorted.csv")

# Filter the data for the year 2019
data_2019 = data[data['Year'] == 2019]

# Create a graph using networkx
G = nx.Graph()

# Define a function to determine the range for max deaths
def get_death_range(deaths):
    return (deaths // 5000) * 5000

# Iterate over each row in the DataFrame to add nodes
for index, row in data_2019.iterrows():
    country_name = row['Country/Territory']
    country_code = row['Code']

    # Exclude specific columns from consideration
    excluded_columns = ['Country/Territory', 'Code', 'Year', 'total_deaths', 'Value', 'Obesity (%)']
    relevant_columns = [col for col in row.index if col not in excluded_columns]

    # Find the column with the maximum value
    max_death_disease = max(relevant_columns, key=lambda col: (row[col], col) if pd.notna(row[col]) else (0, col))
    max_death_value = row[max_death_disease]

    # Extract the actual value from the tuple (value, column_name)
    max_death_disease = max_death_disease

    # Add nodes
    G.add_node(country_code, name=country_name, disease=max_death_disease, deaths=max_death_value)

# Iterate over each pair of nodes to add links based on disease matching and death range
for source_node in G.nodes():
    for target_node in G.nodes():
        if source_node != target_node:
            source_data = G.nodes[source_node]
            target_data = G.nodes[target_node]

            # Determine the death range for source and target
            source_range = get_death_range(source_data['deaths'])
            target_range = get_death_range(target_data['deaths'])

            # Add link only if the diseases match, death ranges match, and the source is not already linked to the target
            if source_data['disease'] == target_data['disease'] and source_range == target_range \
                    and not G.has_edge(source_node, target_node) and not G.has_edge(target_node, source_node):
                G.add_edge(source_node, target_node, disease=source_data['disease'], death_range=source_range)

# Create a dictionary in the desired format for JSON
network_data = {
    "nodes": [{"id": node, "name": G.nodes[node]['name'], "disease": G.nodes[node]['disease'], "deaths": G.nodes[node]['deaths']} for node in G.nodes()],
    "links": [{"source": source, "target": target, "disease": G[source][target]['disease'], "death_range": G[source][target]['death_range']} for source, target in G.edges()]
}

# Convert the dictionary to JSON
json_data = json.dumps(network_data, indent=2)

# Save the JSON data to a file
with open("network_data.json", "w") as json_file:
    json_file.write(json_data)


In [None]:
import networkx as nx
import pandas as pd
import json

# Read the data into a Pandas DataFrame from the CSV file
data = pd.read_csv("deaths_gdp_obesity_sorted.csv")

# Create a graph using networkx
G = nx.Graph()

# Define a function to determine the range for max deaths
def get_death_range(deaths):
    return (deaths // 5000) * 5000

# Create a dictionary to store data for each year
all_years_data = {}

# Iterate over each year in the data
for year in range(1990, 2020):  # Assuming the data spans from 1990 to 2019
    # Filter the data for the current year
    data_year = data[data['Year'] == year]

    # Iterate over each row in the DataFrame to add nodes
    for index, row in data_year.iterrows():
        country_name = row['Country/Territory']
        country_code = row['Code']

        # Exclude specific columns from consideration
        excluded_columns = ['Country/Territory', 'Code', 'Year', 'total_deaths', 'Value', 'Obesity (%)']
        relevant_columns = [col for col in row.index if col not in excluded_columns]

        # Find the column with the maximum value
        max_death_disease = max(relevant_columns, key=lambda col: (row[col], col) if pd.notna(row[col]) else (0, col))
        max_death_value = row[max_death_disease]

        # Extract the actual value from the tuple (value, column_name)
        max_death_disease = max_death_disease

        # Add nodes
        G.add_node(country_code, name=country_name, disease=max_death_disease, deaths=max_death_value, year=year)

    # Iterate over each pair of nodes to add links based on disease matching and death range
    for source_node in G.nodes():
        for target_node in G.nodes():
            if source_node != target_node:
                source_data = G.nodes[source_node]
                target_data = G.nodes[target_node]

                # Determine the death range for source and target
                source_range = get_death_range(source_data['deaths'])
                target_range = get_death_range(target_data['deaths'])

                # Add link only if the diseases match, death ranges match, and the source is not already linked to the target
                if source_data['disease'] == target_data['disease'] and source_range == target_range \
                        and not G.has_edge(source_node, target_node) and not G.has_edge(target_node, source_node):
                    G.add_edge(source_node, target_node, disease=source_data['disease'], death_range=source_range)

    # Create a dictionary for the current year
    year_data = {
        "nodes": [{"id": node, "name": G.nodes[node]['name'], "disease": G.nodes[node]['disease'], "deaths": G.nodes[node]['deaths'], "year": G.nodes[node]['year']} for node in G.nodes()],
        "links": [{"source": source, "target": target, "disease": G[source][target]['disease'], "death_range": G[source][target]['death_range']} for source, target in G.edges()]
    }

    # Store the data for the current year in the dictionary
    all_years_data[year] = year_data

    # Clear the graph for the next iteration
    G.clear()

# Convert the dictionary to JSON
json_data = json.dumps({"years": all_years_data}, indent=2)

# Save the JSON data to a file
with open("network_data_multi_layered.json", "w") as json_file:
    json_file.write(json_data)
