In [1]:
import pandas as pd
import numpy as np

In [2]:
lat_lon_file = pd.read_csv("data/world_lat_lon.csv")

geo_df = pd.DataFrame(lat_lon_file)
geo_df.head()

Unnamed: 0,country_code,latitude,longitude,country,usa_state_code,usa_state_latitude,usa_state_longitude,usa_state
0,AD,42.546245,1.601554,Andorra,AK,63.588753,-154.493062,Alaska
1,AE,23.424076,53.847818,United Arab Emirates,AL,32.318231,-86.902298,Alabama
2,AF,33.93911,67.709953,Afghanistan,AR,35.20105,-91.831833,Arkansas
3,AG,17.060816,-61.796428,Antigua and Barbuda,AZ,34.048928,-111.093731,Arizona
4,AI,18.220554,-63.068615,Anguilla,CA,36.778261,-119.417932,California


In [3]:
geo_df = geo_df.rename(columns={"country":"Country", "latitude":"Latitude", "longitude":"Longitude"})

geo_df = geo_df.drop(columns=["country_code","usa_state_code","usa_state_latitude","usa_state_longitude","usa_state"])

geo_df = geo_df[["Country","Latitude","Longitude"]]
geo_df.head()

Unnamed: 0,Country,Latitude,Longitude
0,Andorra,42.546245,1.601554
1,United Arab Emirates,23.424076,53.847818
2,Afghanistan,33.93911,67.709953
3,Antigua and Barbuda,17.060816,-61.796428
4,Anguilla,18.220554,-63.068615


In [4]:
# nuke data

nuke_file = pd.read_csv("data/nuke_number.csv")

nuke_df = pd.DataFrame(nuke_file) 

nuke_df = nuke_df.rename(columns={"Entity":"Country"})


nuke_df.head()

Unnamed: 0,Country,Code,Year,Nuclear weapons inventory by country (FAS Nuclear Notebook)
0,China,CHN,1945,0
1,China,CHN,1946,0
2,China,CHN,1947,0
3,China,CHN,1948,0
4,China,CHN,1949,0


In [7]:
merge_geo_df = pd.merge(nuke_df,geo_df, on='Country', suffixes=("_nuke","_geo"))
merge_geo_df.head()


Unnamed: 0,Country,Code,Year,Nuclear weapons inventory by country (FAS Nuclear Notebook),Latitude,Longitude
0,China,CHN,1945,0,35.86166,104.195397
1,China,CHN,1946,0,35.86166,104.195397
2,China,CHN,1947,0,35.86166,104.195397
3,China,CHN,1948,0,35.86166,104.195397
4,China,CHN,1949,0,35.86166,104.195397


In [8]:
# GDP data

gdp_file = pd.read_csv("data/GDP_Data_per_Country.csv")

gdp_df = pd.DataFrame(gdp_file)

gdp_df = gdp_df.rename(columns={"country":"Country", "series":"Series", "Date":"Year"})

gdp_df["Country"] = gdp_df["Country"].replace({"Russian Federation":"Russia"})

gdp_df = gdp_df.drop(columns=["Series", "Unit"])

gdp_df.head()

Unnamed: 0,Country,Year,Value
0,China,1960,1483.494071
1,China,1961,1089.958051
2,China,1962,1020.729312
3,China,1963,1098.531898
4,China,1964,1268.463747


In [10]:
# country_year
merge_geo_df["Country_Year"] = merge_geo_df["Country"] + " " + merge_geo_df["Year"].astype(str)

gdp_df["Country_Year"] = gdp_df["Country"] + " " + gdp_df["Year"].astype(str)


In [11]:
# merge_df

merge_df = pd.merge(merge_geo_df, gdp_df, on='Country_Year', suffixes=("_geo","_gdp"))

merge_df.head()


Unnamed: 0,Country_geo,Code,Year_geo,Nuclear weapons inventory by country (FAS Nuclear Notebook),Latitude,Longitude,Country_Year,Country_gdp,Year_gdp,Value
0,China,CHN,1960,0,35.86166,104.195397,China 1960,China,1960,1483.494071
1,China,CHN,1961,0,35.86166,104.195397,China 1961,China,1961,1089.958051
2,China,CHN,1962,0,35.86166,104.195397,China 1962,China,1962,1020.729312
3,China,CHN,1963,0,35.86166,104.195397,China 1963,China,1963,1098.531898
4,China,CHN,1964,1,35.86166,104.195397,China 1964,China,1964,1268.463747


In [15]:
# filtered df

filtered_df = merge_df.drop(columns=['Country_gdp', 'Year_gdp']) 
filtered_df = filtered_df.rename(columns={"Country_geo":"Country","Year_geo":"Year","Nuclear weapons inventory by country (FAS Nuclear Notebook)":"Quantity of Nuclear Weapons", "Value":"GDP (Current LCU)"})

filtered_df["GDP (Current LCU)"] = filtered_df["GDP (Current LCU)"].round(2)

filtered_df.head()

Unnamed: 0,Country,Code,Year,Quantity of Nuclear Weapons,Latitude,Longitude,Country_Year,GDP (Current LCU)
0,China,CHN,1960,0,35.86166,104.195397,China 1960,1483.49
1,China,CHN,1961,0,35.86166,104.195397,China 1961,1089.96
2,China,CHN,1962,0,35.86166,104.195397,China 1962,1020.73
3,China,CHN,1963,0,35.86166,104.195397,China 1963,1098.53
4,China,CHN,1964,1,35.86166,104.195397,China 1964,1268.46


In [16]:
filtered_df.to_csv("data/cleaned_nuke_csv", index=True)

# Database Connection

In [None]:
#Define connection link an instantiate client
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

#Define the database in Mongo
db = client.nukeDB

#Declare the business collection
by_country = db.by_country

In [None]:
#Instantiate for loop for populating business collection
for index, row in filtered_df.iterrows():
    #Populate post dictionary with information to be inserted into business collection
    post = {
        "Country_Year": str(row["Country_Year"]),
        "Country": str(row["Country"]),
        "Code": str(row["Code"]),
        "Year": int(row["Year"]),
        "Quantity of Nuclear Weapons": int(row["Quantity of Nuclear Weapons"]),
        "GDP (Current LCU)": float(row["GDP (Current LCU)"]),
        "Latitude": float(row["Latitude"]),
        "Longitude": float(row["Latitude"])
    }
    #Insert data into business collection, post{} by post{}
    by_country.insert_one(post)