# Extract the Olympic, Population, Population Density, and GDP csv Data.

In [1]:
# Import dependencies
import pandas as pd
import json
pd.set_option('max_colwidth', 400)

In [2]:
# Read the Olympic data into a Pandas DataFrame.
olympic_df = pd.read_csv('Resources/Olympics.csv', header=0)
olympic_df.head()

Unnamed: 0,ID,Name,Sex,Age,Height,Weight,Team,NOC,Games,Year,Season,City,Sport,Event,Medal
0,1,A Dijiang,M,24.0,180.0,80.0,China,CHN,1992 Summer,1992,Summer,Barcelona,Basketball,Basketball Men's Basketball,
1,2,A Lamusi,M,23.0,170.0,60.0,China,CHN,2012 Summer,2012,Summer,London,Judo,Judo Men's Extra-Lightweight,
2,3,Gunnar Nielsen Aaby,M,24.0,,,Denmark,DEN,1920 Summer,1920,Summer,Antwerpen,Football,Football Men's Football,
3,4,Edgar Lindenau Aabye,M,34.0,,,Denmark,DEN,1900 Summer,1900,Summer,Paris,Tug-Of-War,Tug-Of-War Men's Tug-Of-War,Gold
4,5,Christine Jacoba Aaftink,F,21.0,185.0,82.0,Netherlands,NED,1988 Winter,1988,Winter,Calgary,Speed Skating,Speed Skating Women's 500 metres,


In [3]:
# Convert the DataFrame to a list of dictionaries.
dict_values = olympic_df.to_dict(orient='records')

# Write the list of dictionaries to a JSON file.
with open('olympics.json', 'w') as json_file:
    json.dump(dict_values, json_file, indent=4)

print("Data has been successfully converted to JSON and saved to 'olympics.json'")

Data has been successfully converted to JSON and saved to 'olympics.json'


In [4]:
# Read the Density data into a Pandas DataFrame.
density_df = pd.read_csv('Resources/Density_load.csv', header=0)
density_df.head()

Unnamed: 0,Country Name,Country Code,D-Indicator Name,D-Indicator Code,Attribute.1,Year,Value
0,Aruba,ABW,Population density (people per sq. km of land area),EN.POP.DNST,D,1961,310.061111
1,Aruba,ABW,Population density (people per sq. km of land area),EN.POP.DNST,D,1962,314.9
2,Aruba,ABW,Population density (people per sq. km of land area),EN.POP.DNST,D,1963,319.305556
3,Aruba,ABW,Population density (people per sq. km of land area),EN.POP.DNST,D,1964,323.211111
4,Aruba,ABW,Population density (people per sq. km of land area),EN.POP.DNST,D,1965,326.566667


In [5]:
density_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14901 entries, 0 to 14900
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Country Name      14901 non-null  object 
 1   Country Code      14901 non-null  object 
 2   D-Indicator Name  14901 non-null  object 
 3   D-Indicator Code  14901 non-null  object 
 4   Attribute.1       14901 non-null  object 
 5   Year              14901 non-null  int64  
 6   Value             14901 non-null  float64
dtypes: float64(1), int64(1), object(5)
memory usage: 815.0+ KB


In [6]:
# Convert the DataFrame to a list of dictionaries.
dict_values = density_df.to_dict(orient='records')

# Write the list of dictionaries to a JSON file.
with open('density.json', 'w') as json_file:
    json.dump(dict_values, json_file, indent=4)

print("Data has been successfully converted to JSON and saved to 'density.json'")

Data has been successfully converted to JSON and saved to 'density.json'


In [7]:
# Read the GDP data into a Pandas DataFrame.
gdp_df = pd.read_csv('Resources/GDP_load.csv', header=0)
gdp_df.head()

Unnamed: 0,Country Name,Country Code,G-Indicator Name,G-Indicator Code,Attribute.1,Year,Value
0,Aruba,ABW,GDP (current US$),NY.GDP.MKTP.CD,G,1986,405586592.2
1,Aruba,ABW,GDP (current US$),NY.GDP.MKTP.CD,G,1987,487709497.2
2,Aruba,ABW,GDP (current US$),NY.GDP.MKTP.CD,G,1988,596648044.7
3,Aruba,ABW,GDP (current US$),NY.GDP.MKTP.CD,G,1989,695530726.3
4,Aruba,ABW,GDP (current US$),NY.GDP.MKTP.CD,G,1990,764804469.3


In [8]:
# Convert the DataFrame to a list of dictionaries.
dict_values = gdp_df.to_dict(orient='records')

# Write the list of dictionaries to a JSON file.
with open('gdp.json', 'w') as json_file:
    json.dump(dict_values, json_file, indent=4)

print("Data has been successfully converted to JSON and saved to 'gdp.json'")

Data has been successfully converted to JSON and saved to 'gdp.json'


In [9]:
# Read the Population data into a Pandas DataFrame.
population_df = pd.read_csv('Resources/Population_load.csv', header=0)
population_df.head()

Unnamed: 0,Country Name,Country Code,P-Indicator Name,P-Indicator Code,Attribute.1,Year,Value
0,Aruba,ABW,"Population, total",SP.POP.TOTL,P,1960,54608
1,Aruba,ABW,"Population, total",SP.POP.TOTL,P,1961,55811
2,Aruba,ABW,"Population, total",SP.POP.TOTL,P,1962,56682
3,Aruba,ABW,"Population, total",SP.POP.TOTL,P,1963,57475
4,Aruba,ABW,"Population, total",SP.POP.TOTL,P,1964,58178


In [10]:
# Convert the DataFrame to a list of dictionaries.
dict_values = population_df.to_dict(orient='records')

# Write the list of dictionaries to a JSON file.
with open('population.json', 'w') as json_file:
    json.dump(dict_values, json_file, indent=4)

print("Data has been successfully converted to JSON and saved to 'population.json'")

Data has been successfully converted to JSON and saved to 'population.json'


In [11]:
# Read JSON files
with open('density.json', 'r') as f:
    density_data = json.load(f)

with open('gdp.json', 'r') as f:
    gdp_data = json.load(f)

with open('population.json', 'r') as f:
    population_data = json.load(f)

# Merge dictionaries based on 'Year' and 'Country'
merged_data = {}
for data in [density_data, gdp_data, population_data]:
    for entry in data:
        key = (entry['Year'], entry['Country Name'])
        if key not in merged_data:
            merged_data[key] = {}
        merged_data[key].update(entry)

# Convert merged dictionary to a list of dictionaries
merged_list = [entry for entry in merged_data.values()]

# Write the list of dictionaries to a JSON file
with open('merged_data.json', 'w') as json_file:
    json.dump(merged_list, json_file, indent=4)

print("Data has been successfully merged and saved to 'merged_data.json'")


Data has been successfully merged and saved to 'merged_data.json'
