In [1]:
import pandas as pd
import numpy as np
class MetadataCreator:
    @staticmethod
    def create_df(data):
        df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])

        for row in data:
            df = df.append({
                'field_name': row[0],
                'field_type': row[1],
                'nullable': row[2]
            }, ignore_index=True)

        return df
    
    @classmethod
    def to_csv(cls, data, path):
        df = cls.create_df(data)
        base_path = './data'
        df.to_csv(f'{base_path}/{path}', index=False)

In [2]:
def stock_price_metadata_ontology():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])

    data_clean = [
        ['open_date', 'date', False],
        ['open_price', 'double', False],
        ['high_price', 'double', False],
        ['low_price', 'double', False],
        ['close_price', 'double', False],
        ['adj_close_price', 'double', False],
        ['volume', 'int16', False],
        ['ticker_symbol', 'string', False],
        ['stock_id', 'string', False],
    ]
    
    MetadataCreator.to_csv(data_clean, '/stock_price/ontology/stock_price_metadata_ontology.csv')
stock_price_metadata_ontology()

In [3]:
def news_metadata_ontology():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])

    data_clean = [
        ['news_id', 'string', False],
        ['title', 'string', False],
        ['content', 'string', False],
        ['published_date', 'date', False],
        ['publisher', 'string', False],
        ['mentioned_tickers', 'string', False],
    ]
    
    MetadataCreator.to_csv(data_clean, '/news/ontology/news_metadata_ontology.csv')

news_metadata_ontology()

In [4]:
def news_metadata_processed():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])

    data_clean = [
        ['news_id', 'string', False],
        ['title', 'string', False],
        ['content', 'string', False],
        ['published_date', 'date', False],
        ['publisher', 'string', False],
        ['mentioned_tickers', 'string', False],
    ]
    
    MetadataCreator.to_csv(data_clean, '/news/processed/news_metadata_processed.csv')

news_metadata_processed()

In [5]:
def comp_details_metadata_processed():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])
    
    data_clean = [
        ['ticker_symbol', 'string', False],
        ['company_name', 'string', False],
        ['website_link', 'string', True],
        ['num_employees', 'int16', True],
        ['description', 'string', True],
        ['telephone', 'string', True],
        ['sector', 'string', True],
        ['industry', 'string', True],
        ['country', 'string', True],
        ['state', 'string', True],
        ['city', 'string', True],
        ['shareholders_name', 'string', False],
        ['percentage', 'double', False],
    ]
    
    MetadataCreator.to_csv(data_clean, '/company_details/processed/company_details_metadata_processed.csv')
comp_details_metadata_processed()

In [6]:
def comp_details_metadata_ontology():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])
    
    data_clean = [
        ['ticker_symbol', 'string', False],
        ['company_name', 'string', False],
        ['website_link', 'string', True],
        ['num_employees', 'int16', True],
        ['description', 'string', True],
        ['telephone', 'string', True],
        ['sector', 'string', True],
        ['industry', 'string', True],
        ['country', 'string', True],
        ['state', 'string', True],
        ['city', 'string', True],
        ['shareholders_name', 'string', False],
        ['percentage', 'double', False],
    ]
    
    MetadataCreator.to_csv(data_clean, '/company_details/ontology/company_details_metadata_ontology.csv')
comp_details_metadata_ontology()

In [7]:
def yahoo_comp_details_clean():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])
    
    data_clean = [
        ['ticker_symbol_yh', 'string', False],
        ['company_name_yh', 'string', False],
        ['telephone_yh', 'string', True],
        ['country_yh', 'string', True],
        ['state_yh', 'string', True],
        ['city_yh', 'string', True],
        ['sector_yh', 'string', True],
        ['industry_yh', 'string', True],
        ['num_employees_yh', 'int16', True],
        ['description_yh', 'string', True],
        ['website_link_yh', 'string', True],
    ]
    
    MetadataCreator.to_csv(data_clean, '/yahoo_comp_details/clean/yahoo_comp_details_metadata_clean.csv')
yahoo_comp_details_clean()

In [8]:
def state_codes_metadata_raw():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])
    
    data_clean = [
        ['State_name', 'string', True],
        ['Abbrev', 'string', True],
        ['Code', 'string', True],

    ]
    
    MetadataCreator.to_csv(data_clean, '/state_codes/raw/state_codes_metadata_raw.csv')
state_codes_metadata_raw()

In [9]:
def yahoo_comp_details_metadata_raw():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])
    
    data_clean = [
        ['ticker', 'string', True],
        ['company_name', 'string', True],
        ['address', 'string', True],
        ['telephone', 'string', True],
        ['website_url', 'string', True],
        ['industry', 'string', True],
        ['sector', 'string', True],
        ['no_employees', 'string', True],
        ['description', 'string', True],
    ]
    
    MetadataCreator.to_csv(data_clean, '/yahoo_comp_details/raw/yahoo_comp_details_metadata_raw.csv')
yahoo_comp_details_metadata_raw()

In [10]:
def nasdaq_comp_details_metadata_raw():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])
    
    data_clean = [
        ['ticker', 'string', True],
        ['company_name', 'string', True],
        ['description', 'string', True],
        ['industry', 'string', True],
        ['sector', 'string', True],
        ['telephone', 'string', True],
        ['address', 'string', True],
    ]
    
    MetadataCreator.to_csv(data_clean, '/nasdaq_comp_details/raw/nasdaq_comp_details_metadata_raw.csv')
nasdaq_comp_details_metadata_raw()

In [11]:
def nasdaq_comp_details_metadata_clean():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])
    
    data_clean = [
        ['ticker_symbol_nq', 'string', False],
        ['company_name_nq', 'string', False],
        ['description_nq', 'string', True],
        ['industry_nq', 'string', True],
        ['sector_nq', 'string', True],
        ['telephone_nq', 'string', True],
        ['country_nq', 'string', True],
        ['state_nq', 'string', True],
        ['city_nq', 'string', True],
        ['website_link_nq', 'string', True],
        ['num_employees_nq', 'int16', True],
    ]
    
    MetadataCreator.to_csv(data_clean, '/nasdaq_comp_details/clean/nasdaq_comp_details_metadata_clean.csv')
nasdaq_comp_details_metadata_clean()

In [12]:
def bi_comp_details_metadata_raw():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])
    
    data_clean = [
        ['address', 'string', True],
        ['company_name', 'string', True],
        ['description', 'string', True],
        ['fax', 'string', True],

        ['shareholders', 'string', True],
        ['postoffice_box', 'string', True],
        ['telephone', 'string', True],
        ['ticker', 'string', True],
        ['website', 'string', True],

    ]
    
    MetadataCreator.to_csv(data_clean, '/bi_comp_details/raw/bi_comp_details_raw.csv')
bi_comp_details_metadata_raw()

In [13]:
def shareholders_ddl():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])
    
    data_clean = [
        ['shareholder_id', 'string', False],

    ]
    
    MetadataCreator.to_csv(data_clean, '/company_details/ddl/shareholders_metadata_ddl.csv')
shareholders_ddl()

In [14]:
def country_ddl():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])
    
    data_clean = [
        ['country_id', 'string', False],

    ]
    
    MetadataCreator.to_csv(data_clean, '/company_details/ddl/country_metadata_ddl.csv')
country_ddl()

In [15]:
def bi_comp_details_metadata_clean():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])
    
    data_clean = [
        ['company_name_bi', 'string', False],
        ['ticker_symbol_bi', 'string', False],
        ['description_bi', 'string', True],
        ['telephone_bi', 'string', True],

        ['website_link_bi', 'string', True],
        ['shareholders_name_bi', 'string', True],
        ['percentage_bi', 'double', True],
        ['city_bi', 'string', True],
        ['country_bi', 'string', True],
        ['state_bi', 'string', True],
        ['num_employees_bi', 'int16', True],
    ]
    
    MetadataCreator.to_csv(data_clean, '/bi_comp_details/clean/bi_comp_details_metadata_clean.csv')
bi_comp_details_metadata_clean()

In [16]:
def state_ddl():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])
    
    data_clean = [
        ['state_id', 'string', False],

    ]
    
    MetadataCreator.to_csv(data_clean, '/company_details/ddl/state_metadata_ddl.csv')
state_ddl()

In [17]:
def city_ddl():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])
    
    data_clean = [
        ['city_id', 'string', False],

    ]
    
    MetadataCreator.to_csv(data_clean, '/company_details/ddl/city_metadata_ddl.csv')
city_ddl()

In [18]:
def industry_ddl():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])
    
    data_clean = [
        ['industry_id', 'string', False],

    ]
    
    MetadataCreator.to_csv(data_clean, '/company_details/ddl/industry_metadata_ddl.csv')
industry_ddl()

In [19]:
def sector_ddl():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])
    
    data_clean = [
        ['sector_id', 'string', False],

    ]
    
    MetadataCreator.to_csv(data_clean, '/company_details/ddl/sector_metadata_ddl.csv')
sector_ddl()

In [20]:
def comp_details_ddl():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])
    
    data_clean = [
        ['company_id', 'string', False],
        ['company_name', 'string', False],
        ['website_link', 'string', False],
        ['num_employees', 'int16', False],
        ['description', 'string', False],
        ['telephone', 'string', False],
    ]
    
    MetadataCreator.to_csv(data_clean, '/company_details/ddl/company_details_metadata_ddl.csv')
comp_details_ddl()

In [21]:
def company_IN_INDUSTRY_industry_metadata():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])
    
    data_clean = [
        ['company_id', 'string', False],
        ['industry_id', 'string', False],
    ]
    
    MetadataCreator.to_csv(data_clean, '/company_details/ddl/company_IN_INDUSTRY_industry_metadata_ddl.csv')

In [22]:
company_IN_INDUSTRY_industry_metadata()

In [23]:
def industry_MEMBER_OF_sector_metadata():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])
    
    data_clean = [
        ['industry_id', 'string', False],
        ['sector_id', 'string', False],
    ]
    
    MetadataCreator.to_csv(data_clean, '/company_details/ddl/industry_MEMBER_OF_sector_metadata_ddl.csv')

In [24]:
industry_MEMBER_OF_sector_metadata()

In [25]:
def company_LOCALIZED_IN_city_metadata():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])
    
    data_clean = [
        ['company_id', 'string', False],
        ['city_id', 'string', False],
    ]
    
    MetadataCreator.to_csv(data_clean, '/company_details/ddl/company_LOCALIZED_IN_city_metadata_ddl.csv')

In [26]:
company_LOCALIZED_IN_city_metadata()

In [27]:
def city_IS_IN_state_metadata():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])
    
    data_clean = [
        ['city_id', 'string', False],
        ['state_id', 'string', False],
    ]
    
    MetadataCreator.to_csv(data_clean, '/company_details/ddl/city_IS_IN_state_metadata_ddl.csv')

In [28]:
city_IS_IN_state_metadata()

In [29]:
def city_IS_IN_country_metadata():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])
    
    data_clean = [
        ['city_id', 'string', False],
        ['country_id', 'string', False],
    ]
    
    MetadataCreator.to_csv(data_clean, '/company_details/ddl/city_IS_IN_country_metadata_ddl.csv')

In [30]:
city_IS_IN_country_metadata()

In [31]:
def state_PART_OF_country_metadata():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])
    
    data_clean = [
        ['state_id', 'string', False],
        ['country_id', 'string', False],
    ]
    
    MetadataCreator.to_csv(data_clean, '/company_details/ddl/state_PART_OF_country_metadata_ddl.csv')

In [32]:
state_PART_OF_country_metadata()

In [33]:
def shareholder_HAS_SHARES_IN_company_metadata():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])

    data_clean = [
        ['shareholder_id', 'string', False],
        ['company_id', 'string', False],
        ['percentage', 'double', False],
    ]
    
    MetadataCreator.to_csv(data_clean, '/company_details/ddl/shareholder_HAS_SHARES_IN_company_metadata_ddl.csv')

In [34]:
shareholder_HAS_SHARES_IN_company_metadata()

In [35]:
def nasdaq_news_metadata():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])

    data_clean = [
        ['title', 'string', True],
        ['content', 'string', True],
        ['contributor', 'string', True],
        ['published_date', 'string', True],
        ['mentioned_tickers', 'string', True],
    ]
    
    MetadataCreator.to_csv(data_clean, '/nasdaq_news/raw/nasdaq_news_metadata_raw.csv')

In [36]:
nasdaq_news_metadata()

In [37]:
def nasdaq_news_metadata_clean():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])

    data_clean = [
        ['title', 'string', False],
        ['content', 'string', False],
        ['publisher', 'string', False],
        ['published_date', 'date', False],
        ['mentioned_tickers', 'string', False],
    ]
    
    MetadataCreator.to_csv(data_clean, '/nasdaq_news/clean/nasdaq_news_metadata_clean.csv')

In [38]:
nasdaq_news_metadata_clean()

In [39]:
def bi_news_metadata():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])

    data_clean = [
        ['title', 'string', True],
        ['content', 'string', True],
        ['published_date', 'string', True],
        ['publisher', 'string', True],
        ['ticker_symbol', 'string', True],
    ]
    
    MetadataCreator.to_csv(data_clean, '/bi_news/raw/bi_news_metadata_raw.csv')

In [40]:
bi_news_metadata()

In [41]:
def bi_news_metadata_clean():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])

    data_clean = [
        ['title', 'string', False],
        ['content', 'string', False],
        ['published_date', 'date', False],
        ['publisher', 'string', False],
        ['mentioned_tickers', 'string', False],
    ]
    
    MetadataCreator.to_csv(data_clean, '/bi_news/clean/bi_news_metadata_clean.csv')

In [42]:
bi_news_metadata_clean()

In [43]:
def news_metadata_ontology():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])

    data_clean = [
        ['news_id','string', False],
        ['title', 'string', False],
        ['content', 'string', False],
        ['published_date', 'date', False],
        ['publisher', 'string', False],
        ['mentioned_tickers', 'string', False],
    ]
    
    MetadataCreator.to_csv(data_clean, '/news/ontology/news_metadata_ontology.csv')

In [44]:
news_metadata_ontology()

In [45]:
def yahoo_price_metadata():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])

    data_clean = [
        ['open_date', 'string', True],
        ['open_price', 'string', True],
        ['high_price', 'string', True],
        ['low_price', 'string', True],
        ['close_price', 'string', True],
        ['adj_close_price', 'string', True],
        ['volume', 'string', True],
        ['ticker_symbol', 'string', True],
    ]
    
    MetadataCreator.to_csv(data_clean, '/yahoo_stock_price/raw/yahoo_price_metadata_raw.csv')

In [46]:
yahoo_price_metadata()

In [47]:
def yahoo_price_metadata_clean():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])

    data_clean = [
        ['open_date', 'date', False],
        ['open_price', 'double', False],
        ['high_price', 'double', False],
        ['low_price', 'double', False],
        ['close_price', 'double', False],
        ['adj_close_price', 'double', False],
        ['volume', 'int16', False],
        ['ticker_symbol', 'string', False],
        ['stock_id', 'string', False],
    ]
    
    MetadataCreator.to_csv(data_clean, '/yahoo_stock_price/clean/yahoo_price_metadata_clean.csv')

In [48]:
yahoo_price_metadata_clean()

In [49]:
def publisher_metadata_ddl():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])

    data_clean = [
        ['publisher_id', 'string', False],
    ]
    
    MetadataCreator.to_csv(data_clean, '/news/ddl/publisher_metadata_ddl.csv')

In [50]:
publisher_metadata_ddl()

In [69]:
def news_metadata_ddl():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])

    data_clean = [
        ['news_id', 'string', False],
        ['title','string',False],
        ['content', 'string', False],
    ]
    
    MetadataCreator.to_csv(data_clean, '/news/ddl/news_metadata_ddl.csv')

In [70]:
news_metadata_ddl()

In [53]:
def date_metadata_ddl():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])

    data_clean = [
        ['date_id', 'date', False],
    ]
    
    MetadataCreator.to_csv(data_clean, '/news/ddl/date_metadata_ddl.csv')

In [54]:
date_metadata_ddl()

In [55]:
def stock_metadata_ddl():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])

    data_clean = [
        ['open_price', 'double', False],
        ['high_price', 'double', False],
        ['low_price', 'double', False],
        ['close_price', 'double', False],
        ['adj_close_price', 'double', False],
        ['volume', 'int16', False],
        ['stock_id', 'string', False],
    ]
    
    MetadataCreator.to_csv(data_clean, '/stock_price/ddl/stock_metadata_ddl.csv')

In [56]:
stock_metadata_ddl()

In [57]:
def stock_IS_VALUED_FOR_company_metadata_ddl():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])

    data_clean = [
        ['stock_id', 'string', False],
        ['company_id', 'string', False],
    ]
    
    MetadataCreator.to_csv(data_clean, '/stock_price/ddl/stock_IS_VALUED_FOR_company_metadata_ddl.csv')

In [58]:
stock_IS_VALUED_FOR_company_metadata_ddl()

In [59]:
def publisher_PUBLISHES_news_ddl():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])

    data_clean = [
        ['publisher_id', 'string', False],
        ['news_id', 'string', False],
    ]
    
    MetadataCreator.to_csv(data_clean, '/news/ddl/publisher_PUBLISHES_news_metadata_ddl.csv')

In [60]:
publisher_PUBLISHES_news_ddl()

In [61]:
def news_CONCERNS_company_ddl():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])

    data_clean = [
        ['news_id', 'string', False],
        ['company_id', 'string', False],
    ]
    
    MetadataCreator.to_csv(data_clean, '/news/ddl/news_CONCERNS_company_metadata_ddl.csv')

In [62]:
news_CONCERNS_company_ddl()

In [63]:
def news_IS_ISSUED_ON_date_ddl():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])

    data_clean = [
        ['news_id', 'string', False],
        ['date_id', 'date', False],
    ]
    
    MetadataCreator.to_csv(data_clean, '/news/ddl/news_IS_ISSUED_ON_date_metadata_ddl.csv')

In [64]:
news_IS_ISSUED_ON_date_ddl()

In [65]:
def stock_IS_VALUED_ON_date_ddl():
    df = pd.DataFrame(columns=['field_name', 'field_type', 'nullable'])

    data_clean = [
        ['stock_id', 'string', False],
        ['date_id', 'date', False],
    ]
    
    MetadataCreator.to_csv(data_clean, '/stock_price/ddl/stock_IS_VALUED_ON_date_metadata_ddl.csv')

In [66]:
stock_IS_VALUED_ON_date_ddl()