In [9]:
import pandas as pd
import json
from math import radians, sin, cos, sqrt, atan2

In [10]:
# Load the dataset
data = pd.read_csv(r'E:\University\Semester 5\DAV\Project\Dataset.csv')
geo_data = pd.read_csv('countries.csv', quotechar='"', skipinitialspace=True)
geo_data = geo_data.drop(columns=['Country', 'Alpha-2 code', 'Numeric code'])
# Rename columns
geo_data.rename(columns={
    'Alpha-3 code': 'iso3',
    'Latitude (average)': 'latitude',
    'Longitude (average)': 'longitude'
}, inplace=True)

geo_data.head()


Unnamed: 0,iso3,latitude,longitude
0,AFG,33.0,65.0
1,ALA,60.116667,19.9
2,ALB,41.0,20.0
3,DZA,28.0,3.0
4,ASM,-14.3333,-170.0


In [11]:
geo_data = geo_data.applymap(lambda x: x.strip('"') if isinstance(x, str) else x)

geo_data['latitude'] = pd.to_numeric(geo_data['latitude'], errors='coerce')
geo_data['longitude'] = pd.to_numeric(geo_data['longitude'], errors='coerce')
geo_data

Unnamed: 0,iso3,latitude,longitude
0,AFG,33.000000,65.0
1,ALA,60.116667,19.9
2,ALB,41.000000,20.0
3,DZA,28.000000,3.0
4,ASM,-14.333300,-170.0
...,...,...,...
257,WLF,-13.300000,-176.2
258,ESH,24.500000,-13.0
259,YEM,15.000000,48.0
260,ZMB,-15.000000,30.0


In [12]:
merged_data = pd.merge(data, geo_data, on='iso3', how='inner')
merged_data.drop_duplicates(inplace=True)
merged_data.head()

Unnamed: 0,iso3,year,Tuberculosis_Deaths,Country,region,subregion,latitude,longitude
0,AFG,2000,6370.347,Afghanistan,Asia,Southern Asia,33.0,65.0
1,AFG,2001,6116.266,Afghanistan,Asia,Southern Asia,33.0,65.0
2,AFG,2002,5808.648,Afghanistan,Asia,Southern Asia,33.0,65.0
3,AFG,2003,6188.514,Afghanistan,Asia,Southern Asia,33.0,65.0
4,AFG,2004,5735.92,Afghanistan,Asia,Southern Asia,33.0,65.0


In [13]:
# Define the Haversine function to calculate distances
def haversine(lat1, lon1, lat2, lon2):
    R = 6371.0  # Earth radius in kilometers
    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    return R * c

In [14]:
distance_threshold = 2000

def generate_graph_data(year):
    
    # Filter data for the specific year
    year_data = merged_data[merged_data['year'] == year]
    # Create nodes (ensure unique ISO3 codes for the year)
    nodes = [{'id': row['Country'], 
              'region': row['region'], 
              'subregion': row['subregion'], 
              'Tuberculosis_Deaths': row['Tuberculosis_Deaths'], 
            'latitude': row['latitude'],
            'longitude': row['longitude']} for index, row in year_data.iterrows()]
    # Create links (geographical distances between each pair of countries)
    links = []
    for i in range(len(nodes)):
        for j in range(i + 1, len(nodes)):
            iso3_i = nodes[i]['id']
            iso3_j = nodes[j]['id']
            lat1, lon1 = nodes[i]['latitude'], nodes[i]['longitude']
            lat2, lon2 = nodes[j]['latitude'], nodes[j]['longitude']
            distance = haversine(lat1, lon1, lat2, lon2)
            if distance < distance_threshold:
                links.append({'source': iso3_i, 'target': iso3_j, 'value': distance})
    
    # Normalize distances in links
    if links:  # Ensure there are links to normalize
        max_distance = max(link['value'] for link in links)
        min_distance = min(link['value'] for link in links)
        for link in links:
            # Min-Max Normalization
            link['value'] = (link['value'] - min_distance) / (max_distance - min_distance) if max_distance > min_distance else 0
    
    # Construct the graph dictionary
    graph = {'nodes': nodes, 'links': links}
    
    return graph
        
# Generate and save JSON for each year
for year in range(2000, 2020):
    graph_data = generate_graph_data(year)
    with open(f'force_graph_{year}.json', 'w') as f:
        json.dump(graph_data, f, indent=4)

print("JSON files created for each year from 2000 to 2019.")


JSON files created for each year from 2000 to 2019.
