In [1]:
# import dependencies
import pandas as pd
import numpy as np
import pymongo
from pymongo import MongoClient

In [2]:
!conda -V

conda 4.8.3


In [3]:
!pip freeze

appnope==0.1.0
backcall==0.2.0
certifi==2020.6.20
click==7.1.2
colorama==0.4.4
decorator==4.4.2
Flask==1.1.2
Flask-PyMongo==2.3.0
ipykernel==5.3.1
ipython==7.16.1
ipython-genutils==0.2.0
itsdangerous==1.1.0
jedi==0.17.1
Jinja2==2.11.2
jupyter-client==6.1.5
jupyter-core==4.6.3
MarkupSafe==1.1.1
parso==0.7.0
pexpect==4.8.0
pickleshare==0.7.5
prompt-toolkit==3.0.5
ptyprocess==0.6.0
Pygments==2.6.1
pymongo==3.11.0
python-dateutil==2.8.1
pywin32==228
pyzmq==19.0.1
six==1.15.0
tornado==6.0.4
traitlets==4.3.3
wcwidth==0.2.5
Werkzeug==1.0.1
wincertstore==0.2


In [4]:
# nuke data

nuke_file = pd.read_csv("data/nuke_number.csv")

nuke_df = pd.DataFrame(nuke_file) 

nuke_df = nuke_df.rename(columns={"Entity":"Country"})


nuke_df.head()

Unnamed: 0,Country,Code,Year,Nuclear weapons inventory by country (FAS Nuclear Notebook)
0,China,CHN,1945,0
1,China,CHN,1946,0
2,China,CHN,1947,0
3,China,CHN,1948,0
4,China,CHN,1949,0


In [5]:
# GDP data

gdp_file = pd.read_csv("data/GDP_Data_per_Country.csv")

gdp_df = pd.DataFrame(gdp_file)

gdp_df = gdp_df.rename(columns={"country":"Country", "series":"Series", "Date":"Year"})

gdp_df["Country"] = gdp_df["Country"].replace({"Russian Federation":"Russia"})

gdp_df = gdp_df.drop(columns=["Series", "Unit"])

gdp_df.head()

Unnamed: 0,Country,Year,Value
0,China,1960,1483.494071
1,China,1961,1089.958051
2,China,1962,1020.729312
3,China,1963,1098.531898
4,China,1964,1268.463747


In [6]:
# country_year
nuke_df["Country_Year"] = nuke_df["Country"] + " " + nuke_df["Year"].astype(str)

gdp_df["Country_Year"] = gdp_df["Country"] + " " + gdp_df["Year"].astype(str)


In [7]:
# merge_df

merge_df = pd.merge(nuke_df, gdp_df, on='Country_Year', suffixes=("_nuke","_gdp"))

merge_df.head()

Unnamed: 0,Country_nuke,Code,Year_nuke,Nuclear weapons inventory by country (FAS Nuclear Notebook),Country_Year,Country_gdp,Year_gdp,Value
0,China,CHN,1960,0,China 1960,China,1960,1483.494071
1,China,CHN,1961,0,China 1961,China,1961,1089.958051
2,China,CHN,1962,0,China 1962,China,1962,1020.729312
3,China,CHN,1963,0,China 1963,China,1963,1098.531898
4,China,CHN,1964,1,China 1964,China,1964,1268.463747


In [8]:
# filtered df

filtered_df = merge_df.drop(columns=['Country_gdp', 'Year_gdp']) 
filtered_df = filtered_df.rename(columns={"Country_nuke":"Country","Year_nuke":"Year","Nuclear weapons inventory by country (FAS Nuclear Notebook)":"Quantity of Nuclear Weapons", "Value":"GDP (Current LCU)"})

filtered_df["GDP (Current LCU)"] = filtered_df["GDP (Current LCU)"].round(2)

filtered_df.head()

Unnamed: 0,Country,Code,Year,Quantity of Nuclear Weapons,Country_Year,GDP (Current LCU)
0,China,CHN,1960,0,China 1960,1483.49
1,China,CHN,1961,0,China 1961,1089.96
2,China,CHN,1962,0,China 1962,1020.73
3,China,CHN,1963,0,China 1963,1098.53
4,China,CHN,1964,1,China 1964,1268.46


In [9]:
filtered_df.to_csv("data/cleaned_nuke_csv", index=True)

# Database Connection

In [10]:
#Define connection link an instantiate client
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

#Define the 'chicagoFT' database in Mongo
db = client.nukeDB

#Declare the business collection
by_country = db.by_country

In [11]:
#Instantiate for loop for populating business collection
for index, row in filtered_df.iterrows():
    #Populate post dictionary with information to be inserted into business collection
    post = {
        "Country_Year": str(row["Country_Year"]),
        "Country": str(row["Country"]),
        "Code": str(row["Code"]),
        "Year": int(row["Year"]),
        "Quantity of Nuclear Weapons": int(row["Quantity of Nuclear Weapons"]),
        "GDP (Current LCU)": float(row["GDP (Current LCU)"])
    }
    #Insert data into business collection, post{} by post{}
    by_country.insert_one(post)

In [12]:
# #Define an interable list for our 'for loop' to loop through
# unique_zip = merge_df['zip_code'].unique()

# #Instantiate for loop for populating food-truck-by-zip collection
# for row in unique_zip:
#     #Filter dataframe to only contain zip codes equal to that which our 'for loop' is currently interating on
#     df = merge_df[merge_df['zip_code'] == row]
#     #Populate obj dictionary with information to be inserted into food-truck-by-zip collection. iloc[0] is used to display only 1 results, instead of a list of duplicate results
#     obj = {
#         'Zip-Code': str(df["zip_code"].iloc[0]),
#         "Agi": str(df["agi"].iloc[0]),
#         "Total_income": str(df["total_income"].iloc[0]),
#         "Number_of_individuals": str(df["number_of_individuals"].iloc[0]),
#         "Number_of_dependents": str(df["number_of_dependents"].iloc[0]),
#         'Food_trucks' : []
#     }
#     #Define an interable list for our 'nested for loop' to loop through
#     unique_trucks = df['business_name'].unique()
    
#     #This nested 'for loop' breaks down food trucks by zip code
#     for y in unique_trucks:
#         #Filter dataframe to only contain business names equal to that which our 'for loop' is currently interating on
#         fdf = df[df['business_name'] == y]
#         #This grabs all the food truck information that we want for our current given zip code
#         trucks = {
#             "business_name": str(fdf["business_name"].iloc[0]),
#             "app_type": str(fdf["app_type"].iloc[0]),
#             "date_issued": str(fdf["date_issued"].iloc[0])
#         }
#         #Append food truck data onto Food_trucks list for give zip code
#         obj['Food_trucks'].append(trucks)
#     #Insert data into food-truck-by-zip collection, obj{} by obj{}
#     by_zip.insert_one(obj)