Import the dataset with `mongoimport --type csv -d covid_vaccines  -c vaccine_records --headerline --drop us_state_vaccinations.csv`

In [1]:
# Import dependencies
from pymongo import MongoClient
from pprint import pprint
import pandas as pd

In [2]:
# Create an instance of MongoClient
mongo = MongoClient(port=27017)

In [3]:
# confirm that our new database was created
print(mongo.list_database_names())

['admin', 'config', 'covid_vaccines', 'covid_vaccines_byAge', 'epa', 'fruits_db', 'gardenDB', 'local', 'met', 'petsitly_marketing', 'uk_food']


In [4]:
# assign the covid vaccines database to a variable name
db = mongo['covid_vaccines']

In [5]:
# review the collections in our new database
print(db.list_collection_names())

['vaccine_records']


In [6]:
# review a document in the vaccine records collection
pprint(db.vaccine_records.find_one())

{'_id': ObjectId('6581016133468c90a2cdef26'),
 'daily_vaccinations': 5906.0,
 'daily_vaccinations_per_million': 1205.0,
 'daily_vaccinations_raw': 5906.0,
 'date': '2021-01-13',
 'distributed_per_hundred': 7.73,
 'location': 'Alabama',
 'people_fully_vaccinated': 9245.0,
 'people_fully_vaccinated_per_hundred': 0.19,
 'people_vaccinated': 74792.0,
 'people_vaccinated_per_hundred': 1.53,
 'share_doses_used': 0.222,
 'total_boosters': '',
 'total_boosters_per_hundred': '',
 'total_distributed': 378975.0,
 'total_vaccinations': 84040.0,
 'total_vaccinations_per_hundred': 1.71}


In [7]:
# assign the collection to a variable
vaccine_records = db['vaccine_records']

In [11]:
# Filter records down to date range
query = {'date': {'$gte': '2021-01-01',
                  '$lte': '2022-01-01'
                  }}
results = vaccine_records.find(query)

# Use count_documents to display the number of documents in the result
print('Number of records between Jan 1, 2021 and Jan 1, 2022:' , vaccine_records.count_documents(query))

# Display the first document in the results using pprint
pprint(results[0])

Number of records between Jan 1, 2021 and Jan 1, 2022: 23000
{'_id': ObjectId('6581016133468c90a2cdef26'),
 'daily_vaccinations': 5906.0,
 'daily_vaccinations_per_million': 1205.0,
 'daily_vaccinations_raw': 5906.0,
 'date': '2021-01-13',
 'distributed_per_hundred': 7.73,
 'location': 'Alabama',
 'people_fully_vaccinated': 9245.0,
 'people_fully_vaccinated_per_hundred': 0.19,
 'people_vaccinated': 74792.0,
 'people_vaccinated_per_hundred': 1.53,
 'share_doses_used': 0.222,
 'total_boosters': '',
 'total_boosters_per_hundred': '',
 'total_distributed': 378975.0,
 'total_vaccinations': 84040.0,
 'total_vaccinations_per_hundred': 1.71}


In [12]:
# Convert the result to a Pandas DataFrame
vaccine_df = pd.DataFrame(results)

# Display the number of rows in the DataFrame
print('Number of rows:', len(vaccine_df))

# Display the first 10 rows of the DataFrame
vaccine_df.head(10)

Number of rows: 23000


Unnamed: 0,_id,date,location,total_vaccinations,total_distributed,people_vaccinated,people_fully_vaccinated_per_hundred,total_vaccinations_per_hundred,people_fully_vaccinated,people_vaccinated_per_hundred,distributed_per_hundred,daily_vaccinations_raw,daily_vaccinations,daily_vaccinations_per_million,share_doses_used,total_boosters,total_boosters_per_hundred
0,6581016133468c90a2cdef26,2021-01-13,Alabama,84040.0,378975.0,74792.0,0.19,1.71,9245.0,1.53,7.73,5906.0,5906.0,1205.0,0.222,,
1,6581016133468c90a2cdef27,2021-01-12,Alabama,78134.0,377025.0,70861.0,0.15,1.59,7270.0,1.45,7.69,,,,0.207,,
2,6581016133468c90a2cdef28,2021-01-14,Alabama,92300.0,435350.0,80480.0,,1.88,,1.64,8.88,8260.0,7083.0,1445.0,0.212,,
3,6581016133468c90a2cdef29,2021-01-15,Alabama,100567.0,444650.0,86956.0,0.28,2.05,13488.0,1.77,9.07,8267.0,7478.0,1525.0,0.226,,
4,6581016133468c90a2cdef2a,2021-01-16,Alabama,,,,,,,,,,7498.0,1529.0,,,
5,6581016133468c90a2cdef2b,2021-01-17,Alabama,,,,,,,,,,7509.0,1531.0,,,
6,6581016133468c90a2cdef2c,2021-01-18,Alabama,,,,,,,,,,7517.0,1533.0,,,
7,6581016133468c90a2cdef2d,2021-01-20,Alabama,139200.0,483275.0,121113.0,0.37,2.84,17956.0,2.47,9.86,8405.0,7880.0,1607.0,0.288,,
8,6581016133468c90a2cdef2e,2021-01-21,Alabama,165919.0,493125.0,144429.0,0.44,3.38,21345.0,2.95,10.06,26719.0,10517.0,2145.0,0.336,,
9,6581016133468c90a2cdef2f,2021-01-19,Alabama,130795.0,444650.0,114319.0,0.33,2.67,16346.0,2.33,9.07,,7523.0,1534.0,0.294,,


In [13]:
vaccine_df.columns

Index(['_id', 'date', 'location', 'total_vaccinations', 'total_distributed',
       'people_vaccinated', 'people_fully_vaccinated_per_hundred',
       'total_vaccinations_per_hundred', 'people_fully_vaccinated',
       'people_vaccinated_per_hundred', 'distributed_per_hundred',
       'daily_vaccinations_raw', 'daily_vaccinations',
       'daily_vaccinations_per_million', 'share_doses_used', 'total_boosters',
       'total_boosters_per_hundred'],
      dtype='object')

In [14]:
# Cleanedup df and only kept columns that we needed
vaccine_df_clean = vaccine_df[['_id', 'date', 'location', 'total_vaccinations', 'total_distributed',
       'people_vaccinated', 'total_vaccinations_per_hundred', 'people_vaccinated_per_hundred', 'distributed_per_hundred']]

vaccine_df_clean.head(10)

Unnamed: 0,_id,date,location,total_vaccinations,total_distributed,people_vaccinated,total_vaccinations_per_hundred,people_vaccinated_per_hundred,distributed_per_hundred
0,6581016133468c90a2cdef26,2021-01-13,Alabama,84040.0,378975.0,74792.0,1.71,1.53,7.73
1,6581016133468c90a2cdef27,2021-01-12,Alabama,78134.0,377025.0,70861.0,1.59,1.45,7.69
2,6581016133468c90a2cdef28,2021-01-14,Alabama,92300.0,435350.0,80480.0,1.88,1.64,8.88
3,6581016133468c90a2cdef29,2021-01-15,Alabama,100567.0,444650.0,86956.0,2.05,1.77,9.07
4,6581016133468c90a2cdef2a,2021-01-16,Alabama,,,,,,
5,6581016133468c90a2cdef2b,2021-01-17,Alabama,,,,,,
6,6581016133468c90a2cdef2c,2021-01-18,Alabama,,,,,,
7,6581016133468c90a2cdef2d,2021-01-20,Alabama,139200.0,483275.0,121113.0,2.84,2.47,9.86
8,6581016133468c90a2cdef2e,2021-01-21,Alabama,165919.0,493125.0,144429.0,3.38,2.95,10.06
9,6581016133468c90a2cdef2f,2021-01-19,Alabama,130795.0,444650.0,114319.0,2.67,2.33,9.07


In [15]:
# Renamed location column so that we can merge using state header
vaccine_df_clean = vaccine_df_clean.rename(columns={'location': 'state'})
vaccine_df_clean.head()

Unnamed: 0,_id,date,state,total_vaccinations,total_distributed,people_vaccinated,total_vaccinations_per_hundred,people_vaccinated_per_hundred,distributed_per_hundred
0,6581016133468c90a2cdef26,2021-01-13,Alabama,84040.0,378975.0,74792.0,1.71,1.53,7.73
1,6581016133468c90a2cdef27,2021-01-12,Alabama,78134.0,377025.0,70861.0,1.59,1.45,7.69
2,6581016133468c90a2cdef28,2021-01-14,Alabama,92300.0,435350.0,80480.0,1.88,1.64,8.88
3,6581016133468c90a2cdef29,2021-01-15,Alabama,100567.0,444650.0,86956.0,2.05,1.77,9.07
4,6581016133468c90a2cdef2a,2021-01-16,Alabama,,,,,,


Import the dataset with `mongoimport --type csv -d covid_vaccines_byAge  -c records_byAge --headerline --drop COVID-19_Reported_Impact_and_Hospital_by_State.csv`

In [16]:
# confirm that our new database was created
print(mongo.list_database_names())

['admin', 'config', 'covid_vaccines', 'covid_vaccines_byAge', 'epa', 'fruits_db', 'gardenDB', 'local', 'met', 'petsitly_marketing', 'uk_food']


In [17]:
# assign the covid vaccines byAge database to a variable name
db_byAge = mongo['covid_vaccines_byAge']

In [18]:
# review the collections in our new database
print(db_byAge.list_collection_names())

['records_byAge']


In [19]:
# review a document in the vaccine records collection
pprint(db_byAge.records_byAge.find_one())

{'_id': ObjectId('65837bc84b90140c22e41dfe'),
 'adult_icu_bed_covid_utilization': 0.083854819,
 'adult_icu_bed_covid_utilization_coverage': 58,
 'adult_icu_bed_covid_utilization_denominator': 799,
 'adult_icu_bed_covid_utilization_numerator': 67,
 'adult_icu_bed_utilization': 0.668604651,
 'adult_icu_bed_utilization_coverage': 60,
 'adult_icu_bed_utilization_denominator': 860,
 'adult_icu_bed_utilization_numerator': 575,
 'all_pediatric_inpatient_bed_occupied': 0,
 'all_pediatric_inpatient_bed_occupied_coverage': 1,
 'all_pediatric_inpatient_beds': 0,
 'all_pediatric_inpatient_beds_coverage': 1,
 'critical_staffing_shortage_anticipated_within_week_no': 1,
 'critical_staffing_shortage_anticipated_within_week_not_reported': 59,
 'critical_staffing_shortage_anticipated_within_week_yes': 0,
 'critical_staffing_shortage_today_no': 1,
 'critical_staffing_shortage_today_not_reported': 59,
 'critical_staffing_shortage_today_yes': 0,
 'date': '2021-05-10',
 'deaths_covid': 4,
 'deaths_covid_cov

In [20]:
# assign the collection to a variable
records_byAge = db_byAge['records_byAge']

In [21]:
# Filter records down to date range
query = {'date': {'$gte': '2021-01-01',
                  '$lte': '2022-01-01'
                  }}
results_byAge = records_byAge.find(query)

# Use count_documents to display the number of documents in the result
print('Number of records between Jan 1, 2021 and Jan 1, 2022:' , records_byAge.count_documents(query))

# Display the first document in the results using pprint
pprint(results_byAge[0])

Number of records between Jan 1, 2021 and Jan 1, 2022: 19529
{'_id': ObjectId('65837bc84b90140c22e41dfe'),
 'adult_icu_bed_covid_utilization': 0.083854819,
 'adult_icu_bed_covid_utilization_coverage': 58,
 'adult_icu_bed_covid_utilization_denominator': 799,
 'adult_icu_bed_covid_utilization_numerator': 67,
 'adult_icu_bed_utilization': 0.668604651,
 'adult_icu_bed_utilization_coverage': 60,
 'adult_icu_bed_utilization_denominator': 860,
 'adult_icu_bed_utilization_numerator': 575,
 'all_pediatric_inpatient_bed_occupied': 0,
 'all_pediatric_inpatient_bed_occupied_coverage': 1,
 'all_pediatric_inpatient_beds': 0,
 'all_pediatric_inpatient_beds_coverage': 1,
 'critical_staffing_shortage_anticipated_within_week_no': 1,
 'critical_staffing_shortage_anticipated_within_week_not_reported': 59,
 'critical_staffing_shortage_anticipated_within_week_yes': 0,
 'critical_staffing_shortage_today_no': 1,
 'critical_staffing_shortage_today_not_reported': 59,
 'critical_staffing_shortage_today_yes': 0,


In [22]:
# Convert the result to a Pandas DataFrame
age_df = pd.DataFrame(results_byAge)

# Display the number of rows in the DataFrame
print('Number of rows:', len(age_df))

# Display the first 10 rows of the DataFrame
age_df.head(10)

Number of rows: 19529


Unnamed: 0,_id,state,date,critical_staffing_shortage_today_yes,critical_staffing_shortage_today_no,critical_staffing_shortage_today_not_reported,critical_staffing_shortage_anticipated_within_week_yes,critical_staffing_shortage_anticipated_within_week_no,critical_staffing_shortage_anticipated_within_week_not_reported,hospital_onset_covid,...,previous_day_admission_pediatric_covid_confirmed_5_11,previous_day_admission_pediatric_covid_confirmed_5_11_coverage,previous_day_admission_pediatric_covid_confirmed_unknown,previous_day_admission_pediatric_covid_confirmed_unknown_coverage,staffed_icu_pediatric_patients_confirmed_covid,staffed_icu_pediatric_patients_confirmed_covid_coverage,staffed_pediatric_icu_bed_occupancy,staffed_pediatric_icu_bed_occupancy_coverage,total_staffed_pediatric_icu_beds,total_staffed_pediatric_icu_beds_coverage
0,65837bc84b90140c22e41dfe,Nevada,2021-05-10,0,1,59,0,1,59,25,...,,0,,0,,0,0,1,0,1
1,65837bc84b90140c22e41dff,Delaware,2021-05-09,1,13,2,1,13,2,1,...,,0,,0,,0,105,14,172,14
2,65837bc84b90140c22e41e00,Rhode Island,2021-05-09,5,9,1,5,9,1,19,...,,0,,0,,0,83,14,108,14
3,65837bc84b90140c22e41e01,Colorado,2021-05-08,5,87,13,7,85,13,17,...,,0,,0,,0,169,99,402,99
4,65837bc84b90140c22e41e02,Alaska,2021-05-07,1,23,0,1,23,0,0,...,,0,,0,,0,58,24,73,24
5,65837bc84b90140c22e41e03,Wyoming,2021-05-07,3,26,2,4,25,2,1,...,,0,,0,,0,0,7,0,7
6,65837bc84b90140c22e41e04,Hawaii,2021-05-07,1,21,4,2,20,4,1,...,,0,,0,,0,1,18,0,18
7,65837bc84b90140c22e41e05,Nebraska,2021-05-07,9,91,1,15,85,1,0,...,,0,,0,,0,0,12,0,12
8,65837bc84b90140c22e41e06,North Dakota,2021-05-05,11,39,1,9,41,1,29,...,,0,,0,,0,0,5,0,5
9,65837bc84b90140c22e41e07,Nevada,2021-05-02,0,3,59,0,3,59,24,...,,0,,0,,0,0,3,0,3


In [25]:
print(age_df.columns.tolist())

['_id', 'state', 'date', 'critical_staffing_shortage_today_yes', 'critical_staffing_shortage_today_no', 'critical_staffing_shortage_today_not_reported', 'critical_staffing_shortage_anticipated_within_week_yes', 'critical_staffing_shortage_anticipated_within_week_no', 'critical_staffing_shortage_anticipated_within_week_not_reported', 'hospital_onset_covid', 'hospital_onset_covid_coverage', 'inpatient_beds', 'inpatient_beds_coverage', 'inpatient_beds_used', 'inpatient_beds_used_coverage', 'inpatient_beds_used_covid', 'inpatient_beds_used_covid_coverage', 'previous_day_admission_adult_covid_confirmed', 'previous_day_admission_adult_covid_confirmed_coverage', 'previous_day_admission_adult_covid_suspected', 'previous_day_admission_adult_covid_suspected_coverage', 'previous_day_admission_pediatric_covid_confirmed', 'previous_day_admission_pediatric_covid_confirmed_coverage', 'previous_day_admission_pediatric_covid_suspected', 'previous_day_admission_pediatric_covid_suspected_coverage', 'staf

In [26]:
# Cleanedup df and only kept columns that we needed
age_df_clean = age_df[['_id', 'state', 'date', 'inpatient_beds_used_covid', 'total_adult_patients_hospitalized_confirmed_covid', 'total_pediatric_patients_hospitalized_confirmed_covid', 'previous_day_admission_adult_covid_confirmed_20-29',
                           'previous_day_admission_adult_covid_confirmed_30-39', 'previous_day_admission_adult_covid_confirmed_40-49', 'previous_day_admission_adult_covid_confirmed_50-59', 'previous_day_admission_adult_covid_confirmed_60-69',
                           'previous_day_admission_adult_covid_confirmed_70-79', 'previous_day_admission_adult_covid_confirmed_80+', 'deaths_covid']]

age_df_clean.head(10)

Unnamed: 0,_id,state,date,inpatient_beds_used_covid,total_adult_patients_hospitalized_confirmed_covid,total_pediatric_patients_hospitalized_confirmed_covid,previous_day_admission_adult_covid_confirmed_20-29,previous_day_admission_adult_covid_confirmed_30-39,previous_day_admission_adult_covid_confirmed_40-49,previous_day_admission_adult_covid_confirmed_50-59,previous_day_admission_adult_covid_confirmed_60-69,previous_day_admission_adult_covid_confirmed_70-79,previous_day_admission_adult_covid_confirmed_80+,deaths_covid
0,65837bc84b90140c22e41dfe,Nevada,2021-05-10,348,316,1,1,4,4,3,7,5,3,4
1,65837bc84b90140c22e41dff,Delaware,2021-05-09,133,106,2,3,0,1,2,4,1,2,1
2,65837bc84b90140c22e41e00,Rhode Island,2021-05-09,152,112,0,1,0,0,1,0,0,0,2
3,65837bc84b90140c22e41e01,Colorado,2021-05-08,775,704,19,12,10,14,23,26,10,9,2
4,65837bc84b90140c22e41e02,Alaska,2021-05-07,44,42,1,0,1,1,1,0,0,0,3
5,65837bc84b90140c22e41e03,Wyoming,2021-05-07,26,27,0,0,0,0,0,0,2,0,0
6,65837bc84b90140c22e41e04,Hawaii,2021-05-07,66,55,3,0,2,3,2,0,2,0,2
7,65837bc84b90140c22e41e05,Nebraska,2021-05-07,125,111,3,1,2,1,1,4,3,1,2
8,65837bc84b90140c22e41e06,North Dakota,2021-05-05,115,78,1,1,0,1,1,2,0,3,1
9,65837bc84b90140c22e41e07,Nevada,2021-05-02,383,290,3,3,5,8,8,7,5,2,3


In [29]:
# Merged both dataframes
merged_df = pd.merge(vaccine_df_clean, age_df_clean, on=["date", "state"]).sort_values('date')
merged_df.head()

Unnamed: 0,_id_x,date,state,total_vaccinations,total_distributed,people_vaccinated,total_vaccinations_per_hundred,people_vaccinated_per_hundred,distributed_per_hundred,_id_y,...,total_adult_patients_hospitalized_confirmed_covid,total_pediatric_patients_hospitalized_confirmed_covid,previous_day_admission_adult_covid_confirmed_20-29,previous_day_admission_adult_covid_confirmed_30-39,previous_day_admission_adult_covid_confirmed_40-49,previous_day_admission_adult_covid_confirmed_50-59,previous_day_admission_adult_covid_confirmed_60-69,previous_day_admission_adult_covid_confirmed_70-79,previous_day_admission_adult_covid_confirmed_80+,deaths_covid
355,6581016133468c90a2cdf26d,2021-01-12,Alaska,35838.0,141600.0,22486.0,4.9,3.07,19.36,65837bca4b90140c22e44607,...,60,0,0,0,1,1,3,2,0,0
17526,6581016233468c90a2ceba96,2021-01-12,West Virginia,103330.0,160975.0,,5.77,,8.98,65837bc94b90140c22e42e10,...,803,2,2,3,9,8,27,21,17,11
4391,6581016233468c90a2ce2ad7,2021-01-12,Illinois,347005.0,903100.0,284035.0,2.74,2.24,7.13,65837bc84b90140c22e42a8c,...,3174,20,17,22,30,60,103,96,111,56
13621,6581016233468c90a2ce8c12,2021-01-12,Puerto Rico,74559.0,277200.0,61486.0,2.33,1.93,8.68,65837bca4b90140c22e44677,...,307,7,0,3,4,2,5,6,7,10
15396,6581016233468c90a2ce9fef,2021-01-12,Texas,911461.0,1949125.0,809148.0,3.14,2.79,6.72,65837bc94b90140c22e43d30,...,14171,121,81,110,156,275,391,394,303,235


In [30]:
# Dropped _id columns
merged_df.drop(merged_df.columns[[0, 9]],
                    axis=1, inplace=True)

merged_df.head()

Unnamed: 0,date,state,total_vaccinations,total_distributed,people_vaccinated,total_vaccinations_per_hundred,people_vaccinated_per_hundred,distributed_per_hundred,inpatient_beds_used_covid,total_adult_patients_hospitalized_confirmed_covid,total_pediatric_patients_hospitalized_confirmed_covid,previous_day_admission_adult_covid_confirmed_20-29,previous_day_admission_adult_covid_confirmed_30-39,previous_day_admission_adult_covid_confirmed_40-49,previous_day_admission_adult_covid_confirmed_50-59,previous_day_admission_adult_covid_confirmed_60-69,previous_day_admission_adult_covid_confirmed_70-79,previous_day_admission_adult_covid_confirmed_80+,deaths_covid
355,2021-01-12,Alaska,35838.0,141600.0,22486.0,4.9,3.07,19.36,67,60,0,0,0,1,1,3,2,0,0
17526,2021-01-12,West Virginia,103330.0,160975.0,,5.77,,8.98,819,803,2,2,3,9,8,27,21,17,11
4391,2021-01-12,Illinois,347005.0,903100.0,284035.0,2.74,2.24,7.13,3729,3174,20,17,22,30,60,103,96,111,56
13621,2021-01-12,Puerto Rico,74559.0,277200.0,61486.0,2.33,1.93,8.68,410,307,7,0,3,4,2,5,6,7,10
15396,2021-01-12,Texas,911461.0,1949125.0,809148.0,3.14,2.79,6.72,14885,14171,121,81,110,156,275,391,394,303,235


In [31]:
# Confirm that we have all wanted columns
pprint(merged_df.columns.tolist())

['date',
 'state',
 'total_vaccinations',
 'total_distributed',
 'people_vaccinated',
 'total_vaccinations_per_hundred',
 'people_vaccinated_per_hundred',
 'distributed_per_hundred',
 'inpatient_beds_used_covid',
 'total_adult_patients_hospitalized_confirmed_covid',
 'total_pediatric_patients_hospitalized_confirmed_covid',
 'previous_day_admission_adult_covid_confirmed_20-29',
 'previous_day_admission_adult_covid_confirmed_30-39',
 'previous_day_admission_adult_covid_confirmed_40-49',
 'previous_day_admission_adult_covid_confirmed_50-59',
 'previous_day_admission_adult_covid_confirmed_60-69',
 'previous_day_admission_adult_covid_confirmed_70-79',
 'previous_day_admission_adult_covid_confirmed_80+',
 'deaths_covid']


In [32]:
# Save dataframe to csv
merged_df.to_csv("Output/Covid19_Data.csv", index=False)

In [35]:
import csv
import json

# Convert CSV to JSON for dashboard
csvfile = open('Output/Covid19_Data.csv', 'r')
jsonfile = open('Output/Covid19_Data.json', 'w')

fieldnames = [
    'date', 'state', 'total_vaccinations', 'total_distributed',
    'people_vaccinated', 'total_vaccinations_per_hundred',
    'people_vaccinated_per_hundred', 'distributed_per_hundred',
    'inpatient_beds_used_covid', 'total_adult_patients_hospitalized_confirmed_covid',
    'total_pediatric_patients_hospitalized_confirmed_covid',
    'previous_day_admission_adult_covid_confirmed_20-29',
    'previous_day_admission_adult_covid_confirmed_30-39',
    'previous_day_admission_adult_covid_confirmed_40-49',
    'previous_day_admission_adult_covid_confirmed_50-59',
    'previous_day_admission_adult_covid_confirmed_60-69',
    'previous_day_admission_adult_covid_confirmed_70-79',
    'previous_day_admission_adult_covid_confirmed_80+',
    'deaths_covid'
]

reader = csv.DictReader(csvfile, fieldnames)

# Create a list to hold the rows
data = []

# Skip the first row (header)
next(reader)

for row in reader:
    data.append(row)

# Use json.dump to write the entire list to the file
json.dump(data, jsonfile, indent=2)

# Close the files
csvfile.close()
jsonfile.close()
