# 03 Uploading Data to Database

In [85]:
import pandas as pd
import couchdb
import numpy as np

## 3.1 Creating CouchDB Docker Container
- CouchDB Docker Installation:

    - docker pull couchdb
    
    - docker run -d --name mycouchdb -p 5984:5984 -e COUCHDB_USER=admin -e COUCHDB_PASSWORD=password couchdb

- Creating _users db:

    - curl -X PUT http://admin:password@127.0.0.1:5984/_users

## 3.2 Connecting to CouchDB

In [86]:
username = 'admin'
password = 'password'
db_url = 'http://127.0.0.1:5984/'
db_auth_url = f'http://{username}:{password}@127.0.0.1:5984/'

In [87]:
server = couchdb.Server(db_auth_url)
server

<Server 'http://127.0.0.1:5984/'>

## 3.3 Creating Table

In [88]:
try:
    db = server.create('disasters')
except:
    # disasters db already exists
    db = server['disasters']
db

<Database 'disasters'>

## 3.4 Importing Data

In [89]:
df = pd.read_csv('data/nat_disasters_vs_gdp.csv', index_col=0)
df.head()

Unnamed: 0,Year,Group,Type,Event Name,Country,ISO,Region,Continent,Total Deaths,Total Affected,Duration,GDP-1,GDP,GDP+1,GDP+2,GDP+3
0,1961,Meteorological,Storm,,Bangladesh,BGD,Southern Asia,Asia,11000.0,,0,4274894000.0,4817580000.0,5081413000.0,5319458000.0,5386055000.0
1,1961,Meteorological,Storm,,Bangladesh,BGD,Southern Asia,Asia,,,0,4274894000.0,4817580000.0,5081413000.0,5319458000.0,5386055000.0
2,1961,Meteorological,Storm,,Bangladesh,BGD,Southern Asia,Asia,266.0,,0,4274894000.0,4817580000.0,5081413000.0,5319458000.0,5386055000.0
3,1961,Meteorological,Storm,Hattie,Belize,BLZ,Central America,Americas,275.0,,0,28071890.0,29964370.0,31856920.0,33749410.0,36193830.0
4,1961,Climatological,Drought,,Canada,CAN,Northern America,Americas,,,0,40461720000.0,40934950000.0,42227450000.0,45029990000.0,49377520000.0


## 3.5 Preparing Data

In [90]:
# Replacing NaN Values with None so they are json compliant
df = df.replace({np.nan:None})

## 3.6 Upload to Database

In [91]:
# ONLY DO THIS STEP WHEN NECESSARY
# Duration ~ 10min
if db.info()['doc_count'] == 0:

    doc_ids = []
    
    for index, row in df.iterrows():
        doc_id, doc_rev = db.save(row.to_dict())
        doc_ids.append(doc_id)

    print(len(doc_ids))
    print(doc_ids[0])

else:
    print('Data already exists in database')

Data already exists in database
