In [2]:
import pandas as pd


In [55]:
# Load the JSON file
df = pd.read_json("CIK0001415603.json")

In [56]:
# Print the DataFrame
df.head()

Unnamed: 0,cik,entityName,facts
dei,1415603,"NUGEN HOLDINGS, INC.",{'EntityCommonStockSharesOutstanding': {'label...
us-gaap,1415603,"NUGEN HOLDINGS, INC.",{'AccountsPayableAndAccruedLiabilitiesCurrent'...


In [84]:
# Pull out the company's name and CIK

entityName = df.iloc[0,1]
entityCIK = str(df.iloc[0,0])
print('CIK: ' + entityCIK)
print('Company: ' + entityName)

CIK: 1415603
Company: NUGEN HOLDINGS, INC.


The JSON files come in two main partitions (need to find out exactly what they mean)

- DEI

- US-GAAP

General Structure of the JSON file:

- [Partition]
    - Heading Name
        - Label (more verbose heading)
        - Description
        - Units
            - Unit of Measurement
                - ...
                - Val
                - Form
                - Filed (date)
                - ...
    

## DEI Partition

In [89]:
partition = 'DEI'
dei = df.iloc[0,2]
subGroups = dei.items()

# Initialise a dataframe to hold the data
columnNames = ['company', 'CIK', 'headingFull', 'units', 'filedDate', 'value']
masterDf = pd.DataFrame(columns=columnNames)

# DEI is broken down into subgroups
for group in subGroups:
    # Each subgroup has a heading, detailed description, and unit of measurement
    row = {}
    heading = group[0]
    row['company'] = entityName
    row['CIK'] = entityCIK
    row['headingFull'] = group[1]['label']
    desc = group[1]['description']
    
    # The units component contains the actual measurement and the associated metadata (in the 'records' list)
    units = group[1]['units']
    for unit, records in units.items():
        print(heading)
        row['units'] = unit
        # We have a record for every filing date
        for record in records:
            if record['form'] == '10-Q':
                row['filedDate'] = record['filed']
                row['value'] = record['val']
                
                # Append the row to the main dataframe
                nextIdx = len(masterDf)
                masterDf.loc[nextIdx] = row

masterDf.head()

EntityCommonStockSharesOutstanding
EntityPublicFloat


Unnamed: 0,company,CIK,headingFull,units,filedDate,value
0,"NUGEN HOLDINGS, INC.",1415603,"Entity Common Stock, Shares Outstanding",shares,2011-08-19,56966564
1,"NUGEN HOLDINGS, INC.",1415603,"Entity Common Stock, Shares Outstanding",shares,2012-02-16,57081702
2,"NUGEN HOLDINGS, INC.",1415603,"Entity Common Stock, Shares Outstanding",shares,2012-05-14,57081702


## US-GAAP Partition

In [23]:
partition = 'US-GAAP'
usGaap = df.iloc[1,2]
subGroups = usGaap.items()

for group in subGroups:
    heading = group[0]
    headingFull = group[1]['label']
    desc = group[1]['description']
    units = group[1]['units']
    if 'USD' in units.keys():
        pass

for unit in units['USD']:
    print(unit)


{'end': '2011-12-31', 'val': 3500, 'accn': '0001144204-12-009609', 'fy': 2012, 'fp': 'Q1', 'form': '10-Q', 'filed': '2012-02-16', 'frame': 'CY2011Q4I'}
{'end': '2012-03-31', 'val': 3500, 'accn': '0001144204-12-028547', 'fy': 2012, 'fp': 'Q2', 'form': '10-Q', 'filed': '2012-05-14', 'frame': 'CY2012Q1I'}
