In [2]:
# Import the necessary libraries
import sys
import os
import pandas as pd
import warnings

# Add the src folder to the path
sys.path.append(os.path.abspath('../..'))
warnings.filterwarnings("ignore")

# Import the classes
from src.data_tool.data_providers.connect_wrds import get_wrds_data, get_security_daily
from src.data_tool.data_providers.yahoo_finance import yahoo_finance
from src.data_tool.data_providers.polygon import polygon
from src.data_tool.data_providers.financial_datasets import FinancialDatasets
from src.database_tool.connect_db import ConnectDB
from src.database_tool.create_table import TableCreator

# Initialize the classes
yf = yahoo_finance()
pl = polygon()
fd = FinancialDatasets()

db =ConnectDB()
engine = db.get_engine()


FINANCIAL_DATASETS_API_KEY is set
POLYGON_API_KEY is set
DB_USER and DB_PASSWORD are set
DB_USER and DB_PASSWORD are set
DB_USER and DB_PASSWORD are set


In [3]:
def get_database_size(db_instance, database_name):
    """
    Retrieves the total size of the database.
    """
    sql_statement = f"""
    SELECT table_schema "database_name", 
           sum(data_length + index_length) / 1024 / 1024 "database_size_mb" 
    FROM information_schema.TABLES 
    WHERE table_schema = '{database_name}'
    GROUP BY table_schema;
    """
    result = db_instance.execute_sql(sql_statement)
    df = pd.DataFrame(result.fetchall(), columns=result.keys())
    return df

database_name = 'Elite_Traders' # Replace with your database name if different
database_size_df = get_database_size(db, database_name)
print(database_size_df)

   database_name database_size_mb
0  Elite_Traders       4.50000000


In [4]:
def get_table_sizes(db_instance):
    """
    Retrieves the size of each table in the database.
    """
    sql_statement = "SHOW TABLE STATUS"
    result = db_instance.execute_sql(sql_statement)
    df = pd.DataFrame(result.fetchall(), columns=result.keys())
    size_df = df[['Name', 'Data_length', 'Index_length', 'Data_free']]
    size_df['Total_size'] = size_df['Data_length'] + size_df['Index_length']
    size_df['Data_length_MB'] = size_df['Data_length'] / 1024 / 1024
    size_df['Index_length_MB'] = size_df['Index_length'] / 1024 / 1024
    size_df['Data_free_MB'] = size_df['Data_free'] / 1024 / 1024
    size_df['Total_size_MB'] = size_df['Total_size'] / 1024 / 1024
    return size_df

table_sizes_df = get_table_sizes(db)
print(table_sizes_df)

                         Name  Data_length  Index_length  Data_free  \
0      capital_structure_debt        16384             0          0   
1    capital_structure_equity        16384             0          0   
2   capital_structure_summary        16384             0          0   
3                company_news      1589248        147456    4194304   
4           financial_metrics        16384             0          0   
5            financial_ratios        16384             0          0   
6         fundamentals_annual        16384             0          0   
7      fundamentals_quarterly        49152             0          0   
8                 identifiers        16384             0          0   
9               insider_trade       163840             0          0   
10                      price        16384             0          0   
11             security_daily      2637824             0    4194304   

    Total_size  Data_length_MB  Index_length_MB  Data_free_MB  Total_size_MB

In [5]:
db.show_tables()

Unnamed: 0,Tables
0,capital_structure_debt
1,capital_structure_equity
2,capital_structure_summary
3,company_news
4,financial_metrics
5,financial_ratios
6,fundamentals_annual
7,fundamentals_quarterly
8,identifiers
9,insider_trade


In [5]:
tc = TableCreator()
tc.create_all_tables()

DB_USER and DB_PASSWORD are set
Price table already exists
Financial Metrics table already exists
Insider Trade table already exists
Company News table already exists
All tables created successfully


In [12]:
db.show_tables()

Unnamed: 0,Tables
0,capital_structure_debt
1,capital_structure_equity
2,capital_structure_summary
3,company_news
4,financial_metrics
5,financial_ratios
6,fundamentals_annual
7,fundamentals_quarterly
8,identifiers
9,insider_trade


In [7]:
security_daily = db.read_table('security_daily')
security_daily.head()

Unnamed: 0,ticker,datadate,trading_volume,close_price,high_price,low_price,open_price,outstanding_shares
0,ZM,2025-04-15,1916910.0,72.4,73.13,71.785,71.965,262754000.0
1,SNOW,2025-04-15,2380513.0,146.54,147.2,144.2,144.71,334100000.0
2,QCOM,2025-04-15,6324074.0,138.6,140.835,138.03,139.46,1106000000.0
3,AVGO,2025-04-15,16883740.0,178.95,182.2644,178.08,179.725,4701949000.0
4,AMZN,2025-04-15,43204810.0,179.59,182.35,177.9331,181.41,10597730000.0


In [9]:
security_daily.isnull().sum()

ticker                0
datadate              0
trading_volume        0
close_price           0
high_price            0
low_price             0
open_price            0
outstanding_shares    1
dtype: int64

In [11]:
#db.drop_table('security_daily')

Table security_daily dropped successfully


In [7]:
news = db.read_table('company_news')

In [8]:
news.tail()

Unnamed: 0,id,polygon_id,ticker,title,author,publisher,published_utc,article_url,tickers,description,keywords,insights
323,1763,63d96733bd3b5c9c79e15e573382c463ace3114441a151...,NVDA,"Trump's ""Liberation Day"" Tariffs Pummel the ""M...",The Motley Fool,The Motley Fool,2025-04-06 22:20:00,https://www.fool.com/investing/2025/04/06/trum...,"[""GOOG"", ""GOOGL"", ""AAPL"", ""AMZN"", ""META"", ""MSF...",The article discusses the impact of Trump's ne...,"[""import tariffs"", ""tech giants"", ""long-term i...","[{""ticker"": ""GOOG"", ""sentiment"": ""neutral"", ""s..."
324,1764,4588973ba2767bf95d88ad990674ddd6ed05d5b6d4be2b...,NVDA,Editorial: How to Navigate a Market Crash,Thomas Monteiro,Investing.com,2025-04-07 09:35:00,https://www.investing.com/analysis/editorial-h...,"[""AAPL"", ""NVDA"", ""MNST"", ""CSCO""]",The article discusses how to navigate the curr...,"[""market crash"", ""stock market"", ""investing st...","[{""ticker"": ""AAPL"", ""sentiment"": ""positive"", ""..."
325,1765,60e29f352a88db666a8bc27fe4ece5856a70cf3b34f64e...,NVDA,The Smartest Artificial Intelligence (AI) ETF ...,Ryan Vanzo,The Motley Fool,2025-04-07 09:39:00,https://www.fool.com/investing/2025/04/07/smar...,"[""NVDA"", ""AMZN"", ""BAC"", ""BACpB"", ""BACpE"", ""BAC...",The article discusses the rapid growth of the ...,"[""artificial intelligence"", ""AI ETF"", ""investm...","[{""ticker"": ""NVDA"", ""sentiment"": ""positive"", ""..."
326,1766,bc9e47eff8f55f2566cb7055fc6bb2bd7b71d12977adf4...,NVDA,Why Lam Research Stock Crushed the Market Today,Eric Volkman,The Motley Fool,2025-04-07 22:28:00,https://www.fool.com/investing/2025/04/07/why-...,"[""LRCX"", ""NVDA"", ""AVGO"", ""CDNS""]",Bank of America believes that semiconductor co...,"[""semiconductors"", ""tariffs"", ""stock recommend...","[{""ticker"": ""LRCX"", ""sentiment"": ""positive"", ""..."
327,1767,3cc68f186f8a6d3ce9f0a773a50c6f1d00eff354bfe67b...,NVDA,Why Nvidia Stock Jumped in a Wildly Volatile D...,Keith Noonan,The Motley Fool,2025-04-07 22:46:26,https://www.fool.com/investing/2025/04/07/why-...,"[""NVDA""]",Nvidia stock saw significant volatility on Mon...,"[""Nvidia"", ""stock market"", ""tariffs"", ""volatil...","[{""ticker"": ""NVDA"", ""sentiment"": ""positive"", ""..."


In [9]:
#db.drop_table('company_news')