In [29]:
#import dependencies
from sqlalchemy import create_engine, Column, Integer, String, Float, ForeignKey, Date
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import Session
import pandas as pd
from sqlalchemy.ext.automap import automap_base
from sqlalchemy import func


In [30]:
# Step 1: Read CSV Files into DataFrames
industry_groups_df = pd.read_csv('Resources/Industry_Groups.csv')
top_ten_df = pd.read_csv('Resources/top_ten_with_industryID.csv')
top_ten_historic_df = pd.read_csv('Resources/top_ten_historic_5y.csv')
fundamental_df_clean = pd.read_csv('Resources/02_ASX_Fundamental_Final_Clean.csv')
industry_groups_df.head()

Unnamed: 0,industry_name,industry_id
0,Automobiles & Components,1
1,Banks,2
2,Capital Goods,3
3,Class Pend,4
4,Commercial & Professional Services,5


In [31]:
# #trying to see if i had any null values in 'Date' column of top_ten_historic_df
print(top_ten_historic_df['Ticker'].isnull().sum())

0


In [32]:
industry_groups_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27 entries, 0 to 26
Data columns (total 2 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   industry_name  27 non-null     object
 1   industry_id    27 non-null     int64 
dtypes: int64(1), object(1)
memory usage: 564.0+ bytes


In [33]:
fundamental_df_clean.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 259 entries, 0 to 258
Data columns (total 29 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   lastPrice          259 non-null    float64
 1   Change             259 non-null    object 
 2   Bid_Ask            259 non-null    object 
 3   volumePerDay       259 non-null    int64  
 4   volume4wAvg        259 non-null    int64  
 5   Open               245 non-null    float64
 6   dayRange           259 non-null    object 
 7   prevClose          259 non-null    object 
 8   lastTrade          259 non-null    object 
 9   oneWeek            259 non-null    object 
 10  oneMonth           259 non-null    object 
 11  YTD2023            259 non-null    object 
 12  oneYear            259 non-null    object 
 13  vsSectorOneYr      259 non-null    object 
 14  vsASX200OneYr      259 non-null    object 
 15  marketCap          259 non-null    float64
 16  ASXRank            259 non

In [34]:
#convert datatype to date
top_ten_historic_df['Date'] = pd.to_datetime(top_ten_historic_df['Date'])


In [35]:
top_ten_historic_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 60881 entries, 0 to 60880
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Ticker     60881 non-null  object        
 1   Date       60881 non-null  datetime64[ns]
 2   Open       60881 non-null  float64       
 3   High       60881 non-null  float64       
 4   Low        60881 non-null  float64       
 5   Close      60881 non-null  float64       
 6   Adj Close  60881 non-null  float64       
 7   Volume     60881 non-null  int64         
dtypes: datetime64[ns](1), float64(5), int64(1), object(1)
memory usage: 3.7+ MB


In [36]:
top_ten_historic_df['Date'] = pd.to_datetime(top_ten_historic_df['Date']).apply(lambda x: x.date())

In [56]:
#print(top_ten_historic_df['Date'].unique())

In [37]:
industry_groups_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27 entries, 0 to 26
Data columns (total 2 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   industry_name  27 non-null     object
 1   industry_id    27 non-null     int64 
dtypes: int64(1), object(1)
memory usage: 564.0+ bytes


In [41]:
# Define Database Schema
Base = declarative_base()

class IndustryGroups(Base):
    __tablename__ = 'industry_groups'
    
    industry_name = Column(String)
    industry_id = Column(Integer, primary_key=True)
    

class TopTen(Base):
    __tablename__ = 'top_ten'
    
    ticker = Column(String, primary_key=True)
    company_name = Column(String)
    market_cap = Column(Float)
    industry_id = Column(Integer, ForeignKey('industry_groups.industry_id'))

class TopTenHistoric(Base):
    __tablename__ = 'top_ten_historic'
    
    id = Column (Integer, primary_key=True, autoincrement=True)
    Ticker = Column(String, ForeignKey('top_ten.ticker'))
    Date = Column(Date)
    Open = Column(Float)
    High = Column(Float)
    Low = Column(Float)
    Close = Column(Float)
    Adj_Close = Column('Adj Close', Float)
    Volume = Column(Integer)
    
class fundamental(Base):
    __tablename__ = 'fundamental'
    
    lastPrice = Column(Float)         
    Change = Column(String)       
    Bid_Ask = Column(String)           
    volumePerDay = Column(Integer)         
    volume4wAvg = Column(Integer)          
    Open = Column(Float)              
    dayRange = Column(String)              
    prevClose = Column(String)        
    lastTrade = Column(String)            
    oneWeek = Column(String)             
    oneMonth = Column(String)           
    YTD2023 = Column(String)            
    oneYear = Column(String)            
    vsSectorOneYr = Column(String)         
    vsASX200OneYr = Column(String)         
    marketCap = Column(Float)          
    ASXRank = Column(String)            
    sectorRank = Column(String)         
    sharesIssued = Column(Float)       
    Sector = Column(String)               
    similarCompanies = Column(String)     
    EPS = Column(Float)                 
    DPS = Column(Float)                
    bookValuePerShare = Column(Float)    
    Breakdown = Column(String)          
    Recommendation = Column(String)       
    lastUpdated = Column(String)         
    Ticker = Column(String,primary_key=True)            
    PE_Ratio = Column(Float) 
        
#Connect to the Database and Create Tables
# Replace 'sqlite:///asx.db' with your actual database connection string
engine = create_engine('sqlite:///Resources/top_ten_asx.db')
Base.metadata.create_all(engine)



In [42]:
# from sqlalchemy.orm import sessionmaker

# Insert DataFrames into Database
Session = sessionmaker(bind=engine)
session = Session()

# Insert IndustryGroups
session.bulk_insert_mappings(IndustryGroups, industry_groups_df.to_dict(orient="records"))

# Insert TopTen
session.bulk_insert_mappings(TopTen, top_ten_df.to_dict(orient="records"))

# Insert TopTenHistoric
session.bulk_insert_mappings(TopTenHistoric, top_ten_historic_df.to_dict(orient="records"))

# Insert ASX_Fundamental
session.bulk_insert_mappings(Fundamental, fundamental_df_clean.to_dict(orient="records"))




IntegrityError: (sqlite3.IntegrityError) UNIQUE constraint failed: industry_groups.industry_id
[SQL: INSERT INTO industry_groups (industry_name, industry_id) VALUES (?, ?)]
[parameters: (('Automobiles & Components', 1), ('Banks', 2), ('Capital Goods', 3), ('Class Pend', 4), ('Commercial & Professional Services', 5), ('Consumer Discretionary Distribution & Retail', 6), ('Consumer Durables & Apparel', 7), ('Consumer Services', 8)  ... displaying 10 of 27 total bound parameter sets ...  ('Transportation', 26), ('Utilities', 27))]
(Background on this error at: https://sqlalche.me/e/14/gkpj)

In [18]:
# #commit changes
#session.commit()

In [None]:
# #Query the Database and Create DataFrame
# # Example: Querying the TopTen table and creating a DataFrame
# top_ten_query = session.query(TopTen).all()
# asx = pd.DataFrame([{
#     'ticker': record.ticker,
#     'company_name': record.company_name,
#     'market_cap': record.market_cap,
#     'industry_id': record.industry_id
# } for record in top_ten_query])

# print(asx)


In [19]:
#import dependencies for inspector
from sqlalchemy import inspect

In [20]:
# Instantiate an Inspector with the engine
inspector = inspect(engine)

In [21]:
# Get a list of table names
table_names = inspector.get_table_names()
print(table_names)

['fundamental', 'industry_groups', 'top_ten', 'top_ten_historic']


In [22]:
#Get columns of top_ten_historic
columns = inspector.get_columns('top_ten')
for column in columns:
    print(column['name'])

ticker
company_name
market_cap
industry_id


In [23]:
#reflect database into ORM classes 
Base = automap_base()
Base.prepare(autoload_with=engine)
Base.classes.keys()

['fundamental', 'industry_groups', 'top_ten', 'top_ten_historic']

In [24]:
#save a reference to the top_ten table as 'top_ten'
top_ten = Base.classes.top_ten

In [25]:
#create a database session object
session = Session(bind =engine)

In [26]:
#list all of the tickers found in the top_ten table 
session.query(top_ten.ticker).all()

[('360',),
 ('4DS',),
 ('A2M',),
 ('AAC',),
 ('ABB',),
 ('ABV',),
 ('AD8',),
 ('AFI',),
 ('AFP',),
 ('AGL',),
 ('AHL',),
 ('AIA',),
 ('AIZ',),
 ('AKP',),
 ('ALD',),
 ('ALL',),
 ('ALQ',),
 ('ALU',),
 ('ALX',),
 ('AMC',),
 ('AMP',),
 ('ANN',),
 ('ANZ',),
 ('APA',),
 ('APE',),
 ('APM',),
 ('ARB',),
 ('ARG',),
 ('ASX',),
 ('AUB',),
 ('AUI',),
 ('AVH',),
 ('AVJ',),
 ('AX1',),
 ('AXE',),
 ('AZJ',),
 ('BAP',),
 ('BEN',),
 ('BFL',),
 ('BGA',),
 ('BGP',),
 ('BHP',),
 ('BIO',),
 ('BKI',),
 ('BLG',),
 ('BOQ',),
 ('BPT',),
 ('BRG',),
 ('BXB',),
 ('BXN',),
 ('CAR',),
 ('CBA',),
 ('CBO',),
 ('CCO',),
 ('CDA',),
 ('CEN',),
 ('CGC',),
 ('CGF',),
 ('CHC',),
 ('CNU',),
 ('COH',),
 ('COL',),
 ('CPU',),
 ('CSL',),
 ('CTD',),
 ('CUV',),
 ('CVW',),
 ('CWP',),
 ('CWY',),
 ('D2O',),
 ('DBI',),
 ('DDR',),
 ('DDT',),
 ('DHG',),
 ('DMP',),
 ('DOW',),
 ('DTL',),
 ('DTZ',),
 ('DXS',),
 ('DY6',),
 ('EBO',),
 ('EDV',),
 ('ELD',),
 ('ERD',),
 ('EVT',),
 ('EXL',),
 ('FBU',),
 ('FLT',),
 ('FMG',),
 ('FPH',),
 ('FRI',),