In [1]:
# Import dependencies
import sqlalchemy
from sqlalchemy import create_engine
from sqlalchemy.orm import declarative_base
from sqlalchemy import Column, Integer, String, Float, ForeignKey, select
from sqlalchemy.orm import relationship

## Create the NASDAQ database

In [2]:
# Create the NADAQ database and connect to it
engine = create_engine('sqlite:///nasdaq.sqlite')

# Sets an object to utilize the default declarative base
Base = declarative_base()

In [3]:
# Create classes
    
class Sector(Base):
    __tablename__ = 'sector'
    sector_id = Column(Integer, primary_key=True, unique=True)
    sector = Column(String(500))
    ticker_sector = relationship("Metadata", back_populates="sectors")
    
class Industry(Base):
    __tablename__ = 'industry'
    industry_id = Column(Integer, primary_key=True, unique=True)
    industry = Column(String(500))
    ticker_industry = relationship("Metadata", back_populates="industries")
    
class AssetType(Base):
    __tablename__ = 'asset_type'
    asset_type_id = Column(Integer, primary_key=True, unique=True)
    asset_type = Column(String(50))
    ticker_asset_types = relationship("Metadata", back_populates="asset_types")
    
class Country(Base):
    __tablename__ = 'country'
    country_id = Column(Integer, primary_key=True, unique=True)
    country = Column(String(50))
    ticker_countries = relationship("Metadata", back_populates="countries")
    
class Currency(Base):
    __tablename__ = 'currency'
    currency_id = Column(Integer, primary_key=True, unique=True)
    country = Column(String(3))
    ticker_currencies = relationship("Metadata", back_populates="currencies")
           
class Metadata(Base):
    __tablename__ = 'metadata'
    ticker_id = Column(Integer, primary_key=True, unique=True)
    sector_id = Column(Integer, ForeignKey("sector.sector_id"))
    industry_id = Column(Integer, ForeignKey("industry.industry_id"))
    asset_type_id = Column(Integer, ForeignKey("asset_type.asset_type_id"))
    country_id = Column(Integer, ForeignKey("country.country_id"))
    currency_id = Column(Integer, ForeignKey("currency.currency_id"))
    symbol = Column(String(5))
    name = Column(String(50))
    description = Column(String(5000))
    market_capitalization = Column(Integer)
    ebitda = Column(Integer)
    book_value = Column(Float)
    dividend_per_share = Column(Float)
    dividend_yield = Column(Float)
    figures = relationship("Data", back_populates="ticker")
    sectors = relationship("Sector", back_populates="ticker_sector")
    industries = relationship("Industry", back_populates="ticker_industry")
    asset_types = relationship("AssetType", back_populates="ticker_asset_types")
    countries = relationship("Country", back_populates="ticker_countries")
    currencies = relationship("Currency", back_populates="ticker_currencies")
    
class Data(Base):
    __tablename__ = 'data'
    id = Column(Integer, primary_key=True, unique=True)
    ticker_id = Column(Integer, ForeignKey("metadata.ticker_id"))
    date = Column(String(10))
    open = Column(Float)
    high = Column(Float)
    low = Column(Float)
    close = Column(Float)
    volume = Column(Integer)
    ticker = relationship("Metadata", back_populates="figures")
    

In [4]:
# Upload the classes to the database through the engine
Base.metadata.create_all(engine)

## Inspect the NASDAQ database

In [5]:
# Import the inspector dependency
from sqlalchemy import inspect

# Create the inspector and connect it to the database engine
inspector = inspect(engine)

In [6]:
# Check tables exist by getting their names
inspector.get_table_names()

['asset_type', 'country', 'currency', 'data', 'industry', 'metadata', 'sector']

In [7]:
# Check columns and data types in the metadata table
columns = inspector.get_columns('metadata')
for column in columns:
    print(column["name"], column["type"])

ticker_id INTEGER
sector_id INTEGER
industry_id INTEGER
asset_type_id INTEGER
country_id INTEGER
currency_id INTEGER
symbol VARCHAR(5)
name VARCHAR(50)
description VARCHAR(5000)
market_capitalization INTEGER
ebitda INTEGER
book_value FLOAT
dividend_per_share FLOAT
dividend_yield FLOAT


In [8]:
# Check columns and data types in the data table
columns = inspector.get_columns('data')
for column in columns:
    print(column["name"], column["type"])

id INTEGER
ticker_id INTEGER
date VARCHAR(10)
open FLOAT
high FLOAT
low FLOAT
close FLOAT
volume INTEGER


In [9]:
# Check columns and data types in the sector table
columns = inspector.get_columns('sector')
for column in columns:
    print(column["name"], column["type"])

sector_id INTEGER
sector VARCHAR(500)


In [10]:
# Check columns and data types in the industry table
columns = inspector.get_columns('industry')
for column in columns:
    print(column["name"], column["type"])

industry_id INTEGER
industry VARCHAR(500)


In [11]:
# Check columns and data types in the asset type
columns = inspector.get_columns('asset_type')
for column in columns:
    print(column["name"], column["type"])

asset_type_id INTEGER
asset_type VARCHAR(50)


In [12]:
# Check columns and data types in the country table
columns = inspector.get_columns('country')
for column in columns:
    print(column["name"], column["type"])

country_id INTEGER
country VARCHAR(50)


In [13]:
# Check columns and data types in the currency table
columns = inspector.get_columns('currency')
for column in columns:
    print(column["name"], column["type"])

currency_id INTEGER
country VARCHAR(3)


## Import data into the database

In [14]:
# Import pandas dependency
import pandas as pd

In [15]:
# Import the sector csv
sector_table = pd.read_csv("clean_db_data/clean_sector.csv", encoding='UTF-8')

# Upload dataframe to the sector table in the database
sector_table.to_sql("sector", engine, if_exists="replace", index=False)

6

In [16]:
# Import the industry csv
industry_table = pd.read_csv("clean_db_data/clean_industry.csv", encoding='UTF-8')

# Upload dataframe to the industry table in the database
industry_table.to_sql("industry", engine, if_exists="replace", index=False)

51

In [17]:
# Import the asset_type csv
asset_type_table = pd.read_csv("clean_db_data/clean_asset_type.csv", encoding='UTF-8')

# Upload dataframe to the asset type table in the database
asset_type_table.to_sql("asset_type", engine, if_exists="replace", index=False)

1

In [18]:
# Import the country csv
country_table = pd.read_csv("clean_db_data/clean_country.csv", encoding='UTF-8')

# Upload dataframe to the country table in the database
country_table.to_sql("country", engine, if_exists="replace", index=False)

2

In [19]:
# Import the currency csv
currency_table = pd.read_csv("clean_db_data/clean_currency.csv", encoding='UTF-8')

# Upload dataframe to the currency table in the database
currency_table.to_sql("currency", engine, if_exists="replace", index=False)

1

In [20]:
# Import csv file with all the metadata and turn into dataframe
meta_table = pd.read_csv("clean_db_data/clean_metadata.csv", encoding='UTF-8')

# Upload dataframe to the metadata table in the database
meta_table.to_sql("metadata", engine, if_exists="replace", index=False)

101

In [21]:
# Import csv file with all the trading data and turn into dataframe 
data_table = pd.read_csv("clean_db_data/clean_trade_data.csv", encoding='UTF-8')

# Upload dataframe to the data table in the database
data_table.to_sql("data", engine, if_exists="replace", index=False)

5940

## Run basic queries to check data imported correctly

In [22]:
# Import the Session dependency and connet through engine
from sqlalchemy.orm import Session
session = Session(bind=engine)

In [23]:
# Get ticker symbols for each ticker in the Metadata table
tickers = session.query(Metadata)
for ticker in tickers:
    print(ticker.symbol)

AAPL
MSFT
AMZN
NVDA
META
GOOGL
GOOG
TSLA
AVGO
ADBE
COST
PEP
CSCO
NFLX
CMCSA
AMD
TMUS
INTC
INTU
TXN
AMGN
AMAT
HON
QCOM
BKNG
SBUX
ISRG
ADP
MDLZ
GILD
LRCX
REGN
ADI
VRTX
MU
PANW
MELI
SNPS
KLAC
PYPL
CDNS
CHTR
MAR
CSX
ABNB
MNST
ORLY
PDD
ASML
NXPI
WDAY
CTAS
FTNT
LULU
MRVL
ADSK
KDP
ODFL
PCAR
PAYX
MCHP
CPRT
ON
MRNA
ROST
KHC
DXCM
AZN
EXC
AEP
IDXX
SGEN
CRWD
BIIB
BKR
TTD
CTSH
VRSK
CEG
CSGP
EA
GFS
XEL
TEAM
FAST
GEHC
DDOG
FANG
WBD
ANSS
DLTR
ALGN
ILMN
ZS
EBAY
WBA
ZM
SIRI
ENPH
JD
LCID


In [24]:
# Get volume for each ticker in the Data table
trades = session.query(Data)
for trade in trades:
    print(trade.volume)

500216459
1323817340
996368613
1297863403
1275052503
967580718
1520461315
1307294493
1443652725
1675731304
1724948219
1868381344
2083968147
1509735435
1447364668
1749297959
2400305601
1683547838
2171827429
1628570227
2106513962
2443227128
1688864233
1565079040
1797948421
1462773381
1916751489
1606114354
1711935110
1889956694
2650845211
1825486961
2239366098
2319687808
2122724412
2895317580
3886793083
1184207050
755162226
810900890
701660022
816530808
1570331732
755223231
734044103
598871365
448922253
621478768
547408488
683515746
473957094
515218768
739456573
506117812
650981384
472540723
828099179
898917007
961321947
789748068
678972040
64463937
108585952
104767910
120963450
187447041
97111201
111132717
163767205
105781865
117913042
159631558
105821779
123813078
148999823
115460410
169944463
205905262
83596791
125456828
146524079
114583159
132225519
170888359
67084066
125459692
152429056
119020938
196270335
233049259
69671582
134295111
97460918
142160150
17282900
51215704
61177763
108

## Run 'join' queriy to check table relationships set up correctly

In [25]:
# Create query to join Metadata and Data tables on ticker_id,
# and return data from both tables
open = select(Metadata.symbol, Industry.industry, Data.volume).select_from(Metadata)\
        .join(Data, Metadata.ticker_id == Data.ticker_id).join(Industry, Metadata.industry_id == Industry.industry_id)

results = session.execute(open)

for row in results:
    print(row)

('AAPL', 'ELECTRONIC COMPUTERS', 448922253)
('AAPL', 'ELECTRONIC COMPUTERS', 472540723)
('AAPL', 'ELECTRONIC COMPUTERS', 473957094)
('AAPL', 'ELECTRONIC COMPUTERS', 500216459)
('AAPL', 'ELECTRONIC COMPUTERS', 506117812)
('AAPL', 'ELECTRONIC COMPUTERS', 515218768)
('AAPL', 'ELECTRONIC COMPUTERS', 547408488)
('AAPL', 'ELECTRONIC COMPUTERS', 598871365)
('AAPL', 'ELECTRONIC COMPUTERS', 621478768)
('AAPL', 'ELECTRONIC COMPUTERS', 650981384)
('AAPL', 'ELECTRONIC COMPUTERS', 678972040)
('AAPL', 'ELECTRONIC COMPUTERS', 683515746)
('AAPL', 'ELECTRONIC COMPUTERS', 701660022)
('AAPL', 'ELECTRONIC COMPUTERS', 734044103)
('AAPL', 'ELECTRONIC COMPUTERS', 739456573)
('AAPL', 'ELECTRONIC COMPUTERS', 755162226)
('AAPL', 'ELECTRONIC COMPUTERS', 755223231)
('AAPL', 'ELECTRONIC COMPUTERS', 789748068)
('AAPL', 'ELECTRONIC COMPUTERS', 810900890)
('AAPL', 'ELECTRONIC COMPUTERS', 816530808)
('AAPL', 'ELECTRONIC COMPUTERS', 828099179)
('AAPL', 'ELECTRONIC COMPUTERS', 898917007)
('AAPL', 'ELECTRONIC COMPUTERS',

In [26]:
# Close the session
session.close()