# Data retrieval through API 

Based on NCHS - Leading Causes of Death: United States:
This dataset presents the age-adjusted death rates for the 10 leading causes of death in the United States beginning in 1999.Total number of records are 10,886. For the project, records from 2010 through 2017 have been included reducing the sample size to 4576 records by applying filtering ang paging criteria to the endpoint.
Data are based on information from all resident death certificates filed in the 50 states and the District of Columbia using demographic and medical characteristics. Age-adjusted death rates (per 100,000 population) are based on the 2000 U.S. standard population. Populations used for computing death rates after 2010 are postcensal estimates based on the 2010 census, estimated as of July 1, 2010. Rates for census years are based on populations enumerated in the corresponding censuses. Rates for non-census years before 2010 are revised using updated intercensal population estimates and may differ from rates previously published.
Causes of death classified by the International Classification of Diseases, Tenth Revision (ICD–10) are ranked according to the number of deaths assigned to rankable causes. Cause of death statistics are based on the underlying cause of death.

In [94]:
import pandas as pd
import json
import requests
from bs4 import BeautifulSoup
#from sodapy import Socrata

In [95]:
url = "https://data.cdc.gov/resource/bi63-dtpu.json?$where=year>=2010&$limit=4600"

In [96]:
loaded_json = requests.get(url).json()
loaded_json

[{'year': '2010',
  '_113_cause_name': 'Accidents (unintentional injuries) (V01-X59,Y85-Y86)',
  'cause_name': 'Unintentional injuries',
  'state': 'Alabama',
  'deaths': '2394',
  'aadr': '49.6'},
 {'year': '2010',
  '_113_cause_name': 'Accidents (unintentional injuries) (V01-X59,Y85-Y86)',
  'cause_name': 'Unintentional injuries',
  'state': 'Alaska',
  'deaths': '366',
  'aadr': '58.7'},
 {'year': '2010',
  '_113_cause_name': 'Accidents (unintentional injuries) (V01-X59,Y85-Y86)',
  'cause_name': 'Unintentional injuries',
  'state': 'Arizona',
  'deaths': '3018',
  'aadr': '46.7'},
 {'year': '2010',
  '_113_cause_name': 'Accidents (unintentional injuries) (V01-X59,Y85-Y86)',
  'cause_name': 'Unintentional injuries',
  'state': 'Arkansas',
  'deaths': '1461',
  'aadr': '49.4'},
 {'year': '2010',
  '_113_cause_name': 'Accidents (unintentional injuries) (V01-X59,Y85-Y86)',
  'cause_name': 'Unintentional injuries',
  'state': 'California',
  'deaths': '10435',
  'aadr': '27.8'},
 {'year

In [108]:
results_df = pd.DataFrame(loaded_json)
results_df.head()

Unnamed: 0,year,_113_cause_name,cause_name,state,deaths,aadr
0,2010,"Accidents (unintentional injuries) (V01-X59,Y8...",Unintentional injuries,Alabama,2394,49.6
1,2010,"Accidents (unintentional injuries) (V01-X59,Y8...",Unintentional injuries,Alaska,366,58.7
2,2010,"Accidents (unintentional injuries) (V01-X59,Y8...",Unintentional injuries,Arizona,3018,46.7
3,2010,"Accidents (unintentional injuries) (V01-X59,Y8...",Unintentional injuries,Arkansas,1461,49.4
4,2010,"Accidents (unintentional injuries) (V01-X59,Y8...",Unintentional injuries,California,10435,27.8


In [109]:
results_df.count()

year               4576
_113_cause_name    4576
cause_name         4576
state              4576
deaths             4576
aadr               4576
dtype: int64

In [110]:
death_cause=results_df.drop(columns='_113_cause_name')
death_cause

Unnamed: 0,year,cause_name,state,deaths,aadr
0,2010,Unintentional injuries,Alabama,2394,49.6
1,2010,Unintentional injuries,Alaska,366,58.7
2,2010,Unintentional injuries,Arizona,3018,46.7
3,2010,Unintentional injuries,Arkansas,1461,49.4
4,2010,Unintentional injuries,California,10435,27.8
...,...,...,...,...,...
4571,2017,Kidney disease,Virginia,1618,16.9
4572,2017,Kidney disease,Washington,439,5.3
4573,2017,Kidney disease,West Virginia,436,17.1
4574,2017,Kidney disease,Wisconsin,922,12.5


# Load data to MongoDB cloud

In [103]:
import pymongo

In [106]:
# Initialize PyMongo to work with MongoDBs and establish connection with the cluster in cloud using id and password
conn= 'mongodb+srv://Harmeet:harmeet123@cluster0-v6uxh.mongodb.net/test?retryWrites=true&w=majority'
client = pymongo.MongoClient(conn)

In [112]:
# Define database and collection
db = client.death_cause_US_db
collection = db.death_cause

In [113]:
#Add data to the database in cluster
db.collection.insert_many(death_cause.to_dict('records'))

<pymongo.results.InsertManyResult at 0x11ad37230>

In [122]:
#Read data from cluster
records=db.collection.find()
for record in records:
    print(record)

{'_id': ObjectId('5df3cac263ebbde9a082751c'), 'year': '2010', 'cause_name': 'Unintentional injuries', 'state': 'Alabama', 'deaths': '2394', 'aadr': '49.6'}
{'_id': ObjectId('5df3cac263ebbde9a082751d'), 'year': '2010', 'cause_name': 'Unintentional injuries', 'state': 'Alaska', 'deaths': '366', 'aadr': '58.7'}
{'_id': ObjectId('5df3cac263ebbde9a082751e'), 'year': '2010', 'cause_name': 'Unintentional injuries', 'state': 'Arizona', 'deaths': '3018', 'aadr': '46.7'}
{'_id': ObjectId('5df3cac263ebbde9a082751f'), 'year': '2010', 'cause_name': 'Unintentional injuries', 'state': 'Arkansas', 'deaths': '1461', 'aadr': '49.4'}
{'_id': ObjectId('5df3cac263ebbde9a0827520'), 'year': '2010', 'cause_name': 'Unintentional injuries', 'state': 'California', 'deaths': '10435', 'aadr': '27.8'}
{'_id': ObjectId('5df3cac263ebbde9a0827521'), 'year': '2010', 'cause_name': 'Unintentional injuries', 'state': 'Colorado', 'deaths': '2106', 'aadr': '43.5'}
{'_id': ObjectId('5df3cac263ebbde9a0827522'), 'year': '2010'

{'_id': ObjectId('5df3cac263ebbde9a0827581'), 'year': '2010', 'cause_name': 'All causes', 'state': 'West Virginia', 'deaths': '21275', 'aadr': '933.6'}
{'_id': ObjectId('5df3cac263ebbde9a0827582'), 'year': '2010', 'cause_name': 'All causes', 'state': 'Wisconsin', 'deaths': '47308', 'aadr': '719.0'}
{'_id': ObjectId('5df3cac263ebbde9a0827583'), 'year': '2010', 'cause_name': 'All causes', 'state': 'Wyoming', 'deaths': '4438', 'aadr': '778.8'}
{'_id': ObjectId('5df3cac263ebbde9a0827584'), 'year': '2010', 'cause_name': "Alzheimer's disease", 'state': 'Alabama', 'deaths': '1523', 'aadr': '31.2'}
{'_id': ObjectId('5df3cac263ebbde9a0827585'), 'year': '2010', 'cause_name': "Alzheimer's disease", 'state': 'Alaska', 'deaths': '85', 'aadr': '25.9'}
{'_id': ObjectId('5df3cac263ebbde9a0827586'), 'year': '2010', 'cause_name': "Alzheimer's disease", 'state': 'Arizona', 'deaths': '2327', 'aadr': '35.3'}
{'_id': ObjectId('5df3cac263ebbde9a0827587'), 'year': '2010', 'cause_name': "Alzheimer's disease", 

{'_id': ObjectId('5df3cac263ebbde9a0827f33'), 'year': '2014', 'cause_name': 'Diabetes', 'state': 'Ohio', 'deaths': '3641', 'aadr': '25.7'}
{'_id': ObjectId('5df3cac263ebbde9a0827f34'), 'year': '2014', 'cause_name': 'Diabetes', 'state': 'Oklahoma', 'deaths': '1261', 'aadr': '29.1'}
{'_id': ObjectId('5df3cac263ebbde9a0827f35'), 'year': '2014', 'cause_name': 'Diabetes', 'state': 'Oregon', 'deaths': '1083', 'aadr': '22.4'}
{'_id': ObjectId('5df3cac263ebbde9a0827f36'), 'year': '2014', 'cause_name': 'Diabetes', 'state': 'Pennsylvania', 'deaths': '3765', 'aadr': '22.0'}
{'_id': ObjectId('5df3cac263ebbde9a0827f37'), 'year': '2014', 'cause_name': 'Diabetes', 'state': 'Rhode Island', 'deaths': '252', 'aadr': '18.3'}
{'_id': ObjectId('5df3cac263ebbde9a0827f38'), 'year': '2014', 'cause_name': 'Diabetes', 'state': 'South Carolina', 'deaths': '1239', 'aadr': '21.8'}
{'_id': ObjectId('5df3cac263ebbde9a0827f39'), 'year': '2014', 'cause_name': 'Diabetes', 'state': 'South Dakota', 'deaths': '224', 'aadr