In [1]:
# import dependencies
import pandas as pd
import numpy as np
import pymongo
from pymongo import MongoClient

In [2]:
# nuke data

nuke_file = pd.read_csv("data/nuke_number.csv")

nuke_df = pd.DataFrame(nuke_file) 

nuke_df = nuke_df.rename(columns={"Entity":"Country"})


nuke_df.head()

Unnamed: 0,Country,Code,Year,Nuclear weapons inventory by country (FAS Nuclear Notebook)
0,China,CHN,1945,0
1,China,CHN,1946,0
2,China,CHN,1947,0
3,China,CHN,1948,0
4,China,CHN,1949,0


In [3]:
# GDP data

gdp_file = pd.read_csv("data/GDP_Data_per_Country.csv")

gdp_df = pd.DataFrame(gdp_file)

gdp_df = gdp_df.rename(columns={"country":"Country", "series":"Series", "Date":"Year"})

gdp_df["Country"] = gdp_df["Country"].replace({"Russian Federation":"Russia"})

gdp_df = gdp_df.drop(columns=["Series", "Unit"])

gdp_df.head()

Unnamed: 0,Country,Year,Value
0,China,1960,1483.494071
1,China,1961,1089.958051
2,China,1962,1020.729312
3,China,1963,1098.531898
4,China,1964,1268.463747


In [4]:
# country_year
nuke_df["Country_Year"] = nuke_df["Country"] + " " + nuke_df["Year"].astype(str)

gdp_df["Country_Year"] = gdp_df["Country"] + " " + gdp_df["Year"].astype(str)

In [5]:
# merge_df

merge_df = pd.merge(nuke_df, gdp_df, on='Country_Year', suffixes=("_nuke","_gdp"))

merge_df.head()

Unnamed: 0,Country_nuke,Code,Year_nuke,Nuclear weapons inventory by country (FAS Nuclear Notebook),Country_Year,Country_gdp,Year_gdp,Value
0,China,CHN,1960,0,China 1960,China,1960,1483.494071
1,China,CHN,1961,0,China 1961,China,1961,1089.958051
2,China,CHN,1962,0,China 1962,China,1962,1020.729312
3,China,CHN,1963,0,China 1963,China,1963,1098.531898
4,China,CHN,1964,1,China 1964,China,1964,1268.463747


In [6]:
# filtered df

filtered_df = merge_df.drop(columns=['Country_gdp', 'Year_gdp']) 
filtered_df = filtered_df.rename(columns={"Country_nuke":"Country","Year_nuke":"Year","Nuclear weapons inventory by country (FAS Nuclear Notebook)":"Quantity of Nuclear Weapons", "Value":"GDP (Current LCU)"})

filtered_df["GDP (Current LCU)"] = filtered_df["GDP (Current LCU)"].round(2)

filtered_df.head()

Unnamed: 0,Country,Code,Year,Quantity of Nuclear Weapons,Country_Year,GDP (Current LCU)
0,China,CHN,1960,0,China 1960,1483.49
1,China,CHN,1961,0,China 1961,1089.96
2,China,CHN,1962,0,China 1962,1020.73
3,China,CHN,1963,0,China 1963,1098.53
4,China,CHN,1964,1,China 1964,1268.46


In [7]:
filtered_df.to_csv("data/cleaned_nuke_csv", index=True)

# Database Connection

In [8]:
#Define connection link an instantiate client
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

#Define the 'chicagoFT' database in Mongo
db = client.nukeDB

#Declare the business collection
by_country = db.by_country

In [9]:
#Instantiate for loop for populating business collection
for index, row in filtered_df.iterrows():
    #Populate post dictionary with information to be inserted into business collection
    post = {
        "Country_Year": str(row["Country_Year"]),
        "Country": str(row["Country"]),
        "Code": str(row["Code"]),
        "Year": int(row["Year"]),
        "Quantity of Nuclear Weapons": int(row["Quantity of Nuclear Weapons"]),
        "GDP (Current LCU)": float(row["GDP (Current LCU)"])
    }
    #Insert data into business collection, post{} by post{}
    by_country.insert_one(post)