# Crowdfunding data sets

[The Crowdfunding Offerings Data Sets](https://www.sec.gov/dera/data/crowdfunding-offerings-data-sets) provide the structured data from crowdfunding offering statements, updates, annual reports, and terminations filed with the Commission. 

Particularly I was interested in files "FORM_C_ISSUER_INFORMATION", "FORM_C_DISCLOSURE", and "FORM_C_SUBMISSION". I used Pandas library in order to merge tables by taking *accession_number* as a key column and concantenate them together, so the data would be inside a one sigle excel document.

In [1]:
# import our libraries
import requests
import time
import datetime
import pandas as pd
import numpy as np

## Merge dataframes by *ACCESSION_NUMBER*

The code below picks up all "FORM_C_ISSUER_INFORMATION", "FORM_C_DISCLOSURE", and "FORM_C_SUBMISSION" files and meges them by using *accession_number* as key column. Then saves it as new excel file, also adding 2 more columns of *Year* and *Quarter*.

In [67]:
year = [i for i in range(2016,2023)]
quarter = 1

for y in range(len(year)):
    
    # The data starts from second quarter of year 2016, so I had to make an exception here
    if year[y] == 2016:
        quarter = 2
    else:
        quarter = 1
        
    try:
        # By the time of writing this code, only report for first quarter is available
        if year[y] == 2022:
            quarter = 1
            string = str(year[y])+'Q'+str(quarter)+'_cf'
        
            df_info = pd.read_csv('CrowdFunding/'+string+'/FORM_C_ISSUER_INFORMATION.tsv', sep = '\t')
            df_dis = pd.read_csv('CrowdFunding/'+string+'/FORM_C_DISCLOSURE.tsv', sep = '\t')
            df_sub = pd.read_csv('CrowdFunding/'+string+'/FORM_C_SUBMISSION.tsv', sep = '\t')
            
            df_all = pd.merge(df_info, df_dis, on='ACCESSION_NUMBER')
            df_all = pd.merge(df_all, df_sub, on='ACCESSION_NUMBER')
            df_all['Year'] = year[y]
            df_all['Quarter'] = 'Q'+str(quarter)
        
            df_all.to_excel('CrowdFunding_merged/'+string+'.xlsx')
            
        else:
            while quarter != 5:
                string = str(year[y])+'Q'+str(quarter)+'_cf'
        
                df_info = pd.read_csv('CrowdFunding/'+string+'/FORM_C_ISSUER_INFORMATION.tsv', sep = '\t')
                df_dis = pd.read_csv('CrowdFunding/'+string+'/FORM_C_DISCLOSURE.tsv', sep = '\t')
                df_sub = pd.read_csv('CrowdFunding/'+string+'/FORM_C_SUBMISSION.tsv', sep = '\t')
            
                df_all = pd.merge(df_info, df_dis, on='ACCESSION_NUMBER')
                df_all = pd.merge(df_all, df_sub, on='ACCESSION_NUMBER')
                df_all['Year'] = year[y]
                df_all['Quarter'] = 'Q'+str(quarter)
        
                df_all.to_excel('CrowdFunding_merged/'+string+'.xlsx')
                quarter += 1
        
    except FileNotFoundError:
        break


print('Task complete')        

Task complete


## Concatenate them quarterly by year

Since three major files are merged, now I can concantenate each quarter file into a whole year.

In [68]:
year = [i for i in range(2016, 2023)]
quarter = 1

for y in range(len(year)):
    if year[y] == 2016:
        quarter = 2
        string = str(year[y])+'Q'+str(quarter)+'_cf'
        df2 = pd.read_excel('CrowdFunding_merged/'+string+'.xlsx')
        del df2['Unnamed: 0']
    
        quarter += 1
        string = str(year[y])+'Q'+str(quarter)+'_cf'
        df3 = pd.read_excel('CrowdFunding_merged/'+string+'.xlsx')
        del df3['Unnamed: 0']
        
        quarter += 1
        string = str(year[y])+'Q'+str(quarter)+'_cf'
        df4 = pd.read_excel('CrowdFunding_merged/'+string+'.xlsx')
        del df4['Unnamed: 0']
        
        df_whole = pd.concat([df2, df3], axis = 0, ignore_index = True)
        df_whole = pd.concat([df_whole, df4], axis = 0, ignore_index = True)
    
        df_whole.to_excel('CrowdFunding_merged/'+str(year[y])+'_cf'+'.xlsx', index = False)

        
    elif year[y] == 2022:
        quarter = 1
        string = str(year[y])+'Q'+str(quarter)+'_cf'
        df1 = pd.read_excel('CrowdFunding_merged/'+string+'.xlsx')
        del df1['Unnamed: 0']
        
        df_whole = df1
        df_whole.to_excel('CrowdFunding_merged/'+str(year[y])+'_cf'+'.xlsx', index = False)

        
    else:
        quarter = 1
        string = str(year[y])+'Q'+str(quarter)+'_cf'
        df1 = pd.read_excel('CrowdFunding_merged/'+string+'.xlsx')
        del df1['Unnamed: 0']

        quarter += 1
        string = str(year[y])+'Q'+str(quarter)+'_cf'
        df2 = pd.read_excel('CrowdFunding_merged/'+string+'.xlsx')
        del df2['Unnamed: 0']
    
        quarter += 1
        string = str(year[y])+'Q'+str(quarter)+'_cf'
        df3 = pd.read_excel('CrowdFunding_merged/'+string+'.xlsx')
        del df3['Unnamed: 0']
    
        quarter += 1
        string = str(year[y])+'Q'+str(quarter)+'_cf'
        df4 = pd.read_excel('CrowdFunding_merged/'+string+'.xlsx')
        del df4['Unnamed: 0']
    
        df_whole = pd.concat([df1, df2], axis = 0, ignore_index = True)
        df_whole = pd.concat([df_whole, df3], axis = 0, ignore_index = True)
        df_whole = pd.concat([df_whole, df4], axis = 0, ignore_index = True)
    
        df_whole.to_excel('CrowdFunding_merged/'+str(year[y])+'_cf'+'.xlsx', index = False)

print('Task complete')

Task complete


## Concatenate all into one

At last concatenate each yearly file into single file.xlsx

In [69]:
year = [i for i in range(2016, 2023)]

df1 = pd.read_excel('CrowdFunding_merged/2016_cf.xlsx')
df2 = pd.read_excel('CrowdFunding_merged/2017_cf.xlsx')
df3 = pd.read_excel('CrowdFunding_merged/2018_cf.xlsx')
df4 = pd.read_excel('CrowdFunding_merged/2019_cf.xlsx')
df5 = pd.read_excel('CrowdFunding_merged/2020_cf.xlsx')
df6 = pd.read_excel('CrowdFunding_merged/2021_cf.xlsx')
df7 = pd.read_excel('CrowdFunding_merged/2022_cf.xlsx')

df_whole = pd.concat([df1, df2], axis = 0, ignore_index = True)
df_whole = pd.concat([df_whole, df3], axis = 0, ignore_index = True)
df_whole = pd.concat([df_whole, df4], axis = 0, ignore_index = True)
df_whole = pd.concat([df_whole, df5], axis = 0, ignore_index = True)
df_whole = pd.concat([df_whole, df6], axis = 0, ignore_index = True)
df_whole = pd.concat([df_whole, df7], axis = 0, ignore_index = True)

df_whole.to_excel('CrowdFunding_merged/2016-2022_cf.xlsx', index = False)

print('Task complete')

Task complete
