Import the dataset with `mongoimport --type csv -d covid_vaccines  -c vaccine_records --headerline --drop us_state_vaccinations.csv`

In [19]:
# Import dependencies
from pymongo import MongoClient
from pprint import pprint
import pandas as pd

In [2]:
# Create an instance of MongoClient
mongo = MongoClient(port=27017)

In [3]:
# confirm that our new database was created
print(mongo.list_database_names())

['admin', 'config', 'covid_vaccines', 'epa', 'fruits_db', 'gardenDB', 'local', 'met', 'petsitly_marketing', 'uk_food']


In [4]:
# assign the covid vaccines database to a variable name
db = mongo['covid_vaccines']

In [5]:
# review the collections in our new database
print(db.list_collection_names())

['vaccine_records']


In [6]:
# review a document in the vaccine records collection
pprint(db.vaccine_records.find_one())

{'_id': ObjectId('6581016133468c90a2cdef26'),
 'daily_vaccinations': 5906.0,
 'daily_vaccinations_per_million': 1205.0,
 'daily_vaccinations_raw': 5906.0,
 'date': '2021-01-13',
 'distributed_per_hundred': 7.73,
 'location': 'Alabama',
 'people_fully_vaccinated': 9245.0,
 'people_fully_vaccinated_per_hundred': 0.19,
 'people_vaccinated': 74792.0,
 'people_vaccinated_per_hundred': 1.53,
 'share_doses_used': 0.222,
 'total_boosters': '',
 'total_boosters_per_hundred': '',
 'total_distributed': 378975.0,
 'total_vaccinations': 84040.0,
 'total_vaccinations_per_hundred': 1.71}


In [7]:
# assign the collection to a variable
vaccine_records = db['vaccine_records']

In [18]:
# Filter records down to date range
query = {'date': {'$gte': '2021-01-01',
                  '$lte': '2022-01-01'
                  }}
results = vaccine_records.find(query)

# Use count_documents to display the number of documents in the result
print('Number of records between Jan 1, 2021 and Jan 1, 2022:' , vaccine_records.count_documents(query))

# Display the first document in the results using pprint
pprint(results[0])

Number of records between Jan 1, 2021 and Jan 1, 2022: 23000
{'_id': ObjectId('6581016133468c90a2cdef26'),
 'daily_vaccinations': 5906.0,
 'daily_vaccinations_per_million': 1205.0,
 'daily_vaccinations_raw': 5906.0,
 'date': '2021-01-13',
 'distributed_per_hundred': 7.73,
 'location': 'Alabama',
 'people_fully_vaccinated': 9245.0,
 'people_fully_vaccinated_per_hundred': 0.19,
 'people_vaccinated': 74792.0,
 'people_vaccinated_per_hundred': 1.53,
 'share_doses_used': 0.222,
 'total_boosters': '',
 'total_boosters_per_hundred': '',
 'total_distributed': 378975.0,
 'total_vaccinations': 84040.0,
 'total_vaccinations_per_hundred': 1.71}


In [20]:
# Convert the result to a Pandas DataFrame
df = pd.DataFrame(results)

# Display the number of rows in the DataFrame
print('Number of rows:', len(df))

# Display the first 10 rows of the DataFrame
df.head(10)

Number of rows: 23000


Unnamed: 0,_id,date,location,total_vaccinations,total_distributed,people_vaccinated,people_fully_vaccinated_per_hundred,total_vaccinations_per_hundred,people_fully_vaccinated,people_vaccinated_per_hundred,distributed_per_hundred,daily_vaccinations_raw,daily_vaccinations,daily_vaccinations_per_million,share_doses_used,total_boosters,total_boosters_per_hundred
0,6581016133468c90a2cdef26,2021-01-13,Alabama,84040.0,378975.0,74792.0,0.19,1.71,9245.0,1.53,7.73,5906.0,5906.0,1205.0,0.222,,
1,6581016133468c90a2cdef27,2021-01-12,Alabama,78134.0,377025.0,70861.0,0.15,1.59,7270.0,1.45,7.69,,,,0.207,,
2,6581016133468c90a2cdef28,2021-01-14,Alabama,92300.0,435350.0,80480.0,,1.88,,1.64,8.88,8260.0,7083.0,1445.0,0.212,,
3,6581016133468c90a2cdef29,2021-01-15,Alabama,100567.0,444650.0,86956.0,0.28,2.05,13488.0,1.77,9.07,8267.0,7478.0,1525.0,0.226,,
4,6581016133468c90a2cdef2a,2021-01-16,Alabama,,,,,,,,,,7498.0,1529.0,,,
5,6581016133468c90a2cdef2b,2021-01-17,Alabama,,,,,,,,,,7509.0,1531.0,,,
6,6581016133468c90a2cdef2c,2021-01-18,Alabama,,,,,,,,,,7517.0,1533.0,,,
7,6581016133468c90a2cdef2d,2021-01-20,Alabama,139200.0,483275.0,121113.0,0.37,2.84,17956.0,2.47,9.86,8405.0,7880.0,1607.0,0.288,,
8,6581016133468c90a2cdef2e,2021-01-21,Alabama,165919.0,493125.0,144429.0,0.44,3.38,21345.0,2.95,10.06,26719.0,10517.0,2145.0,0.336,,
9,6581016133468c90a2cdef2f,2021-01-19,Alabama,130795.0,444650.0,114319.0,0.33,2.67,16346.0,2.33,9.07,,7523.0,1534.0,0.294,,
