## Install required python libraries

In [None]:
!pip install requests

In [None]:
!pip install psycopg2-binary

In [None]:
!pip install pandas

## Import the libraries

In [1]:
import json
import psycopg2
import requests
import time
import logging
import pandas as pd

## Initialize database constants
> Enter the `database` & `username` created in CockroachDB.

> Enter the `tablename` you wish to create.

In [2]:
database = "bank"
username = "cpuser"
host = "localhost"
port = 26257
tablename = "jsontbl"

## Initialize API
> We will be using `http://dummy.restapiexample.com/api/v1/employees` for the demonstration purpose.

> Sample Json Output:

```json
{
    "status": "success",
    "data": [
        {
            "id": "1",
            "employee_name": "...",
            "employee_salary": "...",
            "employee_age": "...",
            "profile_image": ""
        },
        {
            "id": "2",
            "employee_name": "...",
            "employee_salary": "...",
            "employee_age": "...",
            "profile_image": ""
        },
        { 
            ... 
        }
    ]
}
```

In [3]:
url = "http://dummy.restapiexample.com/api/v1/employees"

## Create a connection to CockroachDB on Red Hat Marketplace

In [4]:
conn = psycopg2.connect(database=database, user=username, host=host, port=port)
conn.set_session(autocommit=True)
cur = conn.cursor()

## Create a table in CockroachDB

In [10]:
def create_table(conn):
    with conn.cursor() as cur:
        cur.execute('CREATE TABLE IF NOT EXISTS {} ( \
        id UUID DEFAULT uuid_v4()::UUID PRIMARY KEY, \
        posts JSONB \
        );'.format(tablename))
        logging.debug("create_table(): status message: {}".format(cur.statusmessage))
    conn.commit()

In [11]:
create_table(conn)

## Read the contents from the table in CockroachDB

In [6]:
def print_content(conn):
    with conn.cursor() as cur:
        cur.execute("SELECT * FROM {}".format(tablename))
        logging.debug("print_content(): status message: {}".format(cur.statusmessage))
        rows = cur.fetchall()
        return rows 

## Delete the contents from the table in CockroachDB

In [7]:
def delete_table_contents(conn):
    with conn.cursor() as cur:
        cur.execute("DELETE FROM {}".format(tablename))
        logging.debug("delete_table_contents(): status message: {}".format(cur.statusmessage))
    conn.commit()

## Delete the table from CockroachDB

In [8]:
def drop_table(conn):
    with conn.cursor() as cur:
        cur.execute("DROP TABLE {}".format(tablename))
        logging.debug("drop_table(): status message: {}".format(cur.statusmessage))
    conn.commit()

## Make API call and store the response

In [12]:
req = requests.get(url, headers={"User-Agent": "Python"})

# Decode the JSON.
resp = req.json()

# Convert the JSON to a string to send to the database.
data = json.dumps(resp)

##### We structure our query so that we extract the `data` field, and then expand that and insert each individual element into the database as a separate row.

In [13]:
cur.execute("""INSERT INTO bank.jsontbl (posts)
        SELECT json_array_elements(%s->'data')""", (data,))

## Query the results from the table

In [14]:
result = print_content(conn)

## Close the connection to the CockroachDB on Red Hat Marketplace

In [15]:
cur.close()
conn.close()

## Store the queried unstructured json in Pandas Dataframe

In [72]:
df = pd.DataFrame(columns = ['Employee_ID', 'Employee_Name', 'Employee_Salary', 'Employee_Age'])

In [73]:
idx = 0
for i in result:
    df.loc[len(df)] = [i[1]['id'], i[1]['employee_name'], i[1]['employee_salary'], i[1]['employee_age']]

In [79]:
df.head(10)

Unnamed: 0,Employee_ID,Employee_Name,Employee_Salary,Employee_Age
0,17,Paul Byrd,725000,64
1,6,Brielle Williamson,372000,61
2,4,Cedric Kelly,433060,22
3,13,Charde Marshall,470600,36
4,6,Brielle Williamson,372000,61
5,2,Garrett Winters,170750,63
6,7,Herrod Chandler,137500,59
7,21,Jenette Caldwell,345000,30
8,16,Michael Silva,198500,66
9,11,Jena Gaines,90560,30


## Summary

We learn't how to get unstructured data from a REST API, created a table in CockroachDB and stored the unstructured Json data into the table. We also learn't to query the unstructured data from CockroachDB table into a pandas dataframe.

## Future Scope

The pandas dataframe can be further used to clean, cleanse and refine the data. The data can be used to build Machine Learning models as well.

## (Optional) Delete the records from the table in CockroachDB

In [None]:
#delete_table_contents(conn)

## (Optional) Delete the table from CockroachDB

In [None]:
#drop_table(conn)