In [1]:
import pandas as pd
import sqlite3

In [2]:
conn = sqlite3.connect('/Users/orestis/MyProjects/InvestorAPI/app/database/ibd.db')

In [3]:
query = "SELECT * FROM economic_indicator_time_series ORDER BY registered_date_ts"

economic_indicators_df = pd.read_sql(query, conn)

In [4]:
economic_indicators_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6500 entries, 0 to 6499
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   indicator_name      6500 non-null   object 
 1   value               6500 non-null   float64
 2   unit                6500 non-null   object 
 3   registered_date     6500 non-null   object 
 4   registered_date_ts  6500 non-null   int64  
dtypes: float64(1), int64(1), object(3)
memory usage: 254.0+ KB


In [35]:
import datetime as dt

def get_indicator_time_series_df_by_name(indicator_name: str) -> pd.DataFrame:
    def extract_month(date_string: str) -> int:
        date_format = "%d-%m-%Y"
        return dt.datetime.strptime(date_string, date_format).month

    def extract_year(date_string: str) -> int:
        date_format = "%d-%m-%Y"
        return dt.datetime.strptime(date_string, date_format).year

        
    query = f"SELECT * FROM economic_indicator_time_series WHERE indicator_name = '{indicator_name}' ORDER BY registered_date_ts"
    df = pd.read_sql(query,conn)
    # Create 'year' and 'month' columns
    df['year'] = df['registered_date'].apply(extract_year)
    df['month'] = df['registered_date'].apply(extract_month)
    df.rename(columns={'value': 'indicator_value'}, inplace=True)
    return df

In [36]:
interest_rate_df = get_indicator_time_series_df_by_name('Interest_Rate')

In [37]:
interest_rate_df.tail()

Unnamed: 0,indicator_name,indicator_value,unit,registered_date,registered_date_ts,year,month
826,Interest_Rate,5.06,percent,01-05-2023,1682888400,2023,5
827,Interest_Rate,5.08,percent,01-06-2023,1685566800,2023,6
828,Interest_Rate,5.12,percent,01-07-2023,1688158800,2023,7
829,Interest_Rate,5.33,percent,01-08-2023,1690837200,2023,8
830,Interest_Rate,5.33,percent,01-09-2023,1693515600,2023,9


In [34]:
def get_sector_time_series_df_by_name(sector_name: str) -> pd.DataFrame:
    def extract_month(date_string: str) -> int:
        date_format = "%d-%m-%Y"
        return dt.datetime.strptime(date_string, date_format).month

    def extract_year(date_string: str) -> int:
        date_format = "%d-%m-%Y"
        return dt.datetime.strptime(date_string, date_format).year

    query = f'''
        SELECT
            AVG(sts.close_price) as sector_avg_price,
            substr(sts.registered_date, 4, 7) AS month_year,
            sts.registered_date
        FROM stock_time_series as sts
        INNER JOIN stock_overview as so
        ON sts.symbol = so.symbol
        WHERE so.sector = '{sector_name}'
        GROUP BY month_year
        ORDER BY sts.registered_date_ts ASC
    '''
    df = pd.read_sql(query,conn)
    # Create 'year' and 'month' columns
    df['year'] = df['registered_date'].apply(extract_year)
    df['month'] = df['registered_date'].apply(extract_month)
    return df

In [38]:
tech_sector_time_series_df = get_sector_time_series_df_by_name('TECHNOLOGY')

In [39]:
tech_sector_time_series_df.tail()

Unnamed: 0,sector_avg_price,month_year,registered_date,year,month
282,64.114936,06-2023,30-06-2023,2023,6
283,67.501632,07-2023,31-07-2023,2023,7
284,65.413177,08-2023,31-08-2023,2023,8
285,62.146369,09-2023,29-09-2023,2023,9
286,76.897073,10-2023,04-10-2023,2023,10


In [40]:
merged_df = pd.merge(tech_sector_time_series_df, interest_rate_df, on=['year', 'month'])

In [41]:
merged_df.head()

Unnamed: 0,sector_avg_price,month_year,registered_date_x,year,month,indicator_name,indicator_value,unit,registered_date_y,registered_date_ts
0,47.304441,12-1999,31-12-1999,1999,12,Interest_Rate,5.3,percent,01-12-1999,943999200
1,42.676656,01-2000,31-01-2000,2000,1,Interest_Rate,5.45,percent,01-01-2000,946677600
2,49.756055,02-2000,29-02-2000,2000,2,Interest_Rate,5.73,percent,01-02-2000,949356000
3,44.379935,03-2000,31-03-2000,2000,3,Interest_Rate,5.85,percent,01-03-2000,951861600
4,38.815606,04-2000,28-04-2000,2000,4,Interest_Rate,6.02,percent,01-04-2000,954536400


In [43]:
correlation = merged_df['sector_avg_price'].corr(merged_df['indicator_value'])

In [44]:
correlation

-0.19029136502355604

In [45]:
def get_correlation_between_sector_and_economic_indicator(sector_name, indicator_name):
    indicator_time_series_df = get_indicator_time_series_df_by_name(indicator_name)
    sector_time_series_df = get_sector_time_series_df_by_name(sector_name)
    merged_df = pd.merge(sector_time_series_df, indicator_time_series_df, on=['year', 'month'])
    return merged_df['sector_avg_price'].corr(merged_df['indicator_value'])