In [1]:
import sqlite3
from typing import Optional

import pandas as pd

## Create the dataframe which contains all the historical fundamental data of a stock

In [2]:
# Create stocks fundamental df
conn = sqlite3.connect('../app/database/ibd.db')

query = '''
SELECT income_statement.*, balance_sheet.*, cash_flow.*
FROM income_statement
INNER JOIN balance_sheet
ON income_statement.fiscal_date_ending = balance_sheet.fiscal_date_ending AND balance_sheet.symbol = 'META'
INNER JOIN cash_flow
ON income_statement.fiscal_date_ending = cash_flow.fiscal_date_ending AND cash_flow.symbol = 'META'
WHERE income_statement.symbol = 'META'
'''
stock_df = pd.read_sql(query, conn)

# Drop columns with duplicated names
stock_df = stock_df.loc[:, ~stock_df.columns.duplicated()]


In [3]:
stock_df.head()

Unnamed: 0,symbol,fiscal_date_ending,reported_currency,gross_profit,total_revenue,cost_of_revenue,cost_of_goods_and_services_sold,operating_income,selling_general_and_administrative,research_and_development,...,dividend_payout,dividend_payout_common_stock,dividend_payout_preferred_stock,proceeds_from_issuance_of_common_stock,proceeds_from_issuance_of_long_term_debt_and_capital_securities_net,proceeds_from_issuance_of_preferred_stock,proceeds_from_repurchase_of_equity,proceeds_from_sale_of_treasury_stock,change_in_cash_and_cash_equivalents,change_in_exchange_rate
0,META,2023-06-30,USD,25939000000.0,31884000000.0,5945000000.0,5945000000.0,9392000000.0,7318000000.0,9344000000.0,...,,,,,0.0,,-898000000.0,,17398000000.0,
1,META,2023-03-31,USD,22483000000.0,28591000000.0,6108000000.0,6108000000.0,7227000000.0,5929000000.0,9381000000.0,...,,,,,0.0,,-9365000000.0,,-3261000000.0,
2,META,2022-12-31,USD,23584000000.0,31920000000.0,8336000000.0,8336000000.0,6399000000.0,7659000000.0,9771000000.0,...,,,,0.0,0.0,,-6863000000.0,,-80000000.0,
3,META,2022-09-30,USD,21994000000.0,27710000000.0,5716000000.0,5716000000.0,5664000000.0,7164000000.0,9170000000.0,...,,,,0.0,9921000000.0,,-6354000000.0,,2138000000.0,
4,META,2022-06-30,USD,23626000000.0,28818000000.0,5192000000.0,5192000000.0,8358000000.0,6582000000.0,8690000000.0,...,,,,0.0,,,-5233000000.0,,-1326000000.0,


## Create dataframe that contains interest rates historical data

In [4]:
# Create the interest rates df
query = '''
SELECT  *
FROM economic_indicator_time_series
WHERE indicator_name = 'Interest_Rate'
'''

interest_rate_df = pd.read_sql(query, conn)
interest_rate_df

Unnamed: 0,indicator_name,value,unit,registered_date,registered_date_ts
0,Interest_Rate,5.12,percent,01-07-2023,1688158800
1,Interest_Rate,5.08,percent,01-06-2023,1685566800
2,Interest_Rate,5.06,percent,01-05-2023,1682888400
3,Interest_Rate,4.83,percent,01-04-2023,1680296400
4,Interest_Rate,4.65,percent,01-03-2023,1677621600
...,...,...,...,...,...
824,Interest_Rate,0.83,percent,01-11-1954,-478663200
825,Interest_Rate,0.85,percent,01-10-1954,-481341600
826,Interest_Rate,1.07,percent,01-09-1954,-483933600
827,Interest_Rate,1.22,percent,01-08-1954,-486612000


## Create dataframe that contains treasury yield(10 year) historical data

In [5]:
query = '''
SELECT  *
FROM economic_indicator_time_series
WHERE indicator_name = 'Treasury_Yield'
'''

treasury_yield_df = pd.read_sql(query, conn)
treasury_yield_df

Unnamed: 0,indicator_name,value,unit,registered_date,registered_date_ts
0,Treasury_Yield,3.90,percent,01-07-2023,1688158800
1,Treasury_Yield,3.75,percent,01-06-2023,1685566800
2,Treasury_Yield,3.57,percent,01-05-2023,1682888400
3,Treasury_Yield,3.46,percent,01-04-2023,1680296400
4,Treasury_Yield,3.66,percent,01-03-2023,1677621600
...,...,...,...,...,...
839,Treasury_Yield,2.95,percent,01-08-1953,-518148000
840,Treasury_Yield,2.93,percent,01-07-1953,-520826400
841,Treasury_Yield,3.11,percent,01-06-1953,-523418400
842,Treasury_Yield,3.05,percent,01-05-1953,-526096800


## Create dataframe that contains global commodities index historical data

In [6]:
query = '''
SELECT  *
FROM economic_indicator_time_series
WHERE indicator_name = 'Global_Commodities_Index'
'''

commodities_index_df = pd.read_sql(query, conn)
commodities_index_df

Unnamed: 0,indicator_name,value,unit,registered_date,registered_date_ts
0,Global_Commodities_Index,154.695420,index 2016=100,01-06-2023,1685566800
1,Global_Commodities_Index,157.292251,index 2016=100,01-05-2023,1682888400
2,Global_Commodities_Index,170.972547,index 2016=100,01-04-2023,1680296400
3,Global_Commodities_Index,168.294185,index 2016=100,01-03-2023,1677621600
4,Global_Commodities_Index,174.564801,index 2016=100,01-02-2023,1675202400
...,...,...,...,...,...
241,Global_Commodities_Index,63.065977,index 2016=100,01-05-2003,1051736400
242,Global_Commodities_Index,61.887199,index 2016=100,01-04-2003,1049144400
243,Global_Commodities_Index,65.782768,index 2016=100,01-03-2003,1046469600
244,Global_Commodities_Index,70.229514,index 2016=100,01-02-2003,1044050400


## Create dataframe that contains Unemployment historical data

In [7]:
query = '''
SELECT  *
FROM economic_indicator_time_series
WHERE indicator_name = 'Unemployment'
'''

unemployment_df = pd.read_sql(query, conn)
unemployment_df

Unnamed: 0,indicator_name,value,unit,registered_date,registered_date_ts
0,Unemployment,3.5,percent,01-07-2023,1688158800
1,Unemployment,3.6,percent,01-06-2023,1685566800
2,Unemployment,3.7,percent,01-05-2023,1682888400
3,Unemployment,3.4,percent,01-04-2023,1680296400
4,Unemployment,3.5,percent,01-03-2023,1677621600
...,...,...,...,...,...
902,Unemployment,3.5,percent,01-05-1948,-683863200
903,Unemployment,3.9,percent,01-04-1948,-686455200
904,Unemployment,4.0,percent,01-03-1948,-689133600
905,Unemployment,3.8,percent,01-02-1948,-691639200


## Create dataframe that contains Inflation historical data(yearly)

In [9]:
query = '''
SELECT  *
FROM economic_indicator_time_series
WHERE indicator_name = 'Inflation'
ORDER BY registered_date_ts DESC
'''

inflation_df = pd.read_sql(query, conn)
inflation_df

Unnamed: 0,indicator_name,value,unit,registered_date,registered_date_ts
0,Inflation,4.642850,percent,01-01-2023,1693083600
1,Inflation,8.002800,percent,01-01-2022,1640988000
2,Inflation,4.697859,percent,01-01-2021,1609452000
3,Inflation,1.233584,percent,01-01-2020,1577829600
4,Inflation,1.812210,percent,01-01-2019,1546293600
...,...,...,...,...,...
59,Inflation,1.278912,percent,01-01-1964,-189396000
60,Inflation,1.239669,percent,01-01-1963,-220932000
61,Inflation,1.198773,percent,01-01-1962,-252468000
62,Inflation,1.070724,percent,01-01-1961,-284004000


## Helper function to calculate average value from given time series in a given timeframe

In [10]:
def calculate_time_series_avg_value(start_date: str, time_series_df: pd.DataFrame ,months: int = 3) -> Optional[int]:
    """
    Given a start calculate what was the avg value
    between <start_date> and <start_date> + <months> time
    """
    lower_bound = pd.Timestamp(start_date)
    
    upper_bound = lower_bound + pd.DateOffset(months=months)
    
    # Filter the DataFrame
    filtered_df = time_series_df[(time_series_df['registered_date_ts'] >= lower_bound.timestamp()) & (time_series_df['registered_date_ts'] <= upper_bound.timestamp())]
    
    if len(filtered_df) == 0:
        return None

    average_value = filtered_df['value'].mean()
    return average_value

In [12]:
calculate_time_series_avg_value(
    start_date='2022-06-30',
    time_series_df=interest_rate_df,
    months=3
)

2.19

## Helper function to return inflation of given year

In [36]:
def get_inflation_value_by_date(date_string: str, inflation_df: pd.DataFrame) -> Optional[float]:
    try:
        date_obj = pd.to_datetime(date_string, format='%Y-%m-%d')
        target_year = date_obj.year
        inflation_df['register_date_pandas_dt'] = pd.to_datetime(inflation_df['registered_date'], format='%d-%m-%Y')
        selected_row = inflation_df[inflation_df['register_date_pandas_dt'].dt.year == target_year]
        if not selected_row.empty:
            return selected_row['value'].iloc[0]
        else:
            return None
    except (ValueError, KeyError):
        return None

In [39]:
result = get_inflation_value_by_date('2023-12-31', inflation_df)
print("Result is:", result)

Result is: 4.64285


## Create new columns with average values of economic indicators and inflation of given year

In [40]:
temp_stock_df = stock_df.copy()
temp_stock_df['avg_interest_rate'] = temp_stock_df['fiscal_date_ending'].apply(
                                                          calculate_time_series_avg_value,
                                                          time_series_df=interest_rate_df,
                                                          months=3
                                                        )

In [41]:
temp_stock_df['avg_treasury_yield'] = temp_stock_df['fiscal_date_ending'].apply(
                                                          calculate_time_series_avg_value,
                                                          time_series_df=treasury_yield_df,
                                                          months=3
                                                        )

In [42]:
temp_stock_df['avg_unemployment_rate'] = temp_stock_df['fiscal_date_ending'].apply(
                                                          calculate_time_series_avg_value,
                                                          time_series_df=unemployment_df,
                                                          months=3
                                                        )

In [43]:
temp_stock_df['avg_global_commodities_index_value'] = temp_stock_df['fiscal_date_ending'].apply(
                                                          calculate_time_series_avg_value,
                                                          time_series_df=commodities_index_df,
                                                          months=3
                                                        )

In [44]:
temp_stock_df['inflation'] = temp_stock_df['fiscal_date_ending'].apply(
                                                          get_inflation_value_by_date,
                                                          inflation_df=inflation_df,
                                                        )

In [45]:
temp_stock_df.head()

Unnamed: 0,symbol,fiscal_date_ending,reported_currency,gross_profit,total_revenue,cost_of_revenue,cost_of_goods_and_services_sold,operating_income,selling_general_and_administrative,research_and_development,...,proceeds_from_issuance_of_preferred_stock,proceeds_from_repurchase_of_equity,proceeds_from_sale_of_treasury_stock,change_in_cash_and_cash_equivalents,change_in_exchange_rate,avg_interest_rate,avg_treasury_yield,avg_unemployment_rate,avg_global_commodities_index_value,inflation
0,META,2023-06-30,USD,25939000000.0,31884000000.0,5945000000.0,5945000000.0,9392000000.0,7318000000.0,9344000000.0,...,,-898000000.0,,17398000000.0,,5.12,3.9,3.5,,4.64285
1,META,2023-03-31,USD,22483000000.0,28591000000.0,6108000000.0,6108000000.0,7227000000.0,5929000000.0,9381000000.0,...,,-9365000000.0,,-3261000000.0,,4.99,3.593333,3.566667,160.986739,4.64285
2,META,2022-12-31,USD,23584000000.0,31920000000.0,8336000000.0,8336000000.0,6399000000.0,7659000000.0,9771000000.0,...,,-6863000000.0,,-80000000.0,,4.516667,3.646667,3.5,175.165088,8.0028
3,META,2022-09-30,USD,21994000000.0,27710000000.0,5716000000.0,5716000000.0,5664000000.0,7164000000.0,9170000000.0,...,,-6354000000.0,,2138000000.0,,3.653333,3.83,3.6,192.598952,8.0028
4,META,2022-06-30,USD,23626000000.0,28818000000.0,5192000000.0,5192000000.0,8358000000.0,6582000000.0,8690000000.0,...,,-5233000000.0,,-1326000000.0,,2.19,3.106667,3.566667,230.461208,8.0028


In [46]:
commodities_index_df.head(10)

Unnamed: 0,indicator_name,value,unit,registered_date,registered_date_ts
0,Global_Commodities_Index,154.69542,index 2016=100,01-06-2023,1685566800
1,Global_Commodities_Index,157.292251,index 2016=100,01-05-2023,1682888400
2,Global_Commodities_Index,170.972547,index 2016=100,01-04-2023,1680296400
3,Global_Commodities_Index,168.294185,index 2016=100,01-03-2023,1677621600
4,Global_Commodities_Index,174.564801,index 2016=100,01-02-2023,1675202400
5,Global_Commodities_Index,182.636276,index 2016=100,01-01-2023,1672524000
6,Global_Commodities_Index,194.348353,index 2016=100,01-12-2022,1669845600
7,Global_Commodities_Index,192.082166,index 2016=100,01-11-2022,1667253600
8,Global_Commodities_Index,191.366336,index 2016=100,01-10-2022,1664571600
9,Global_Commodities_Index,220.018633,index 2016=100,01-09-2022,1661979600


In [47]:
(154.695420 + 157.292251 + 170.972547) / 3

160.9867393333333