# 0. Configs and Imports

## 0.1. Imports

In [2]:
# Import for directory creation
from src.utils.file_operators import create_directory_structure

# Imports for data fetch
from src.data_collectors.yahoo_fin_collector import fetch_data

# Imports for loading config
from src.utils.file_operators import load_yaml

# Imports for Correlation Calculation
from src.computation.correlation import calculate_pairwise_corrrelation

# Imports for Valuation Calculation
from src.computation.valuation import raw_valuation_score_computation

## 0.2. Base file path, Configuration Filepath & Configuration Dictionary

In [3]:
base_file_path = "C:/Users/sharv/Documents/Sharvil/Projects/AutoWeight/data"

In [4]:
conf_path = "C:/Users/sharv/Documents/Sharvil/Projects/AutoWeight/config-sd.yaml"

In [5]:
config = load_yaml(conf_path)

## 0.3. Create Directory Structure (Optional: Run once only)

In [6]:
create_directory_structure(path=base_file_path, verbose=True)

[38;5;195mINFO: Creating Base Directory[0m
[1m[92mSUCCESS: Base Directory path created at: C:\Users\sharv\Documents\Sharvil\Projects\AutoWeight\data[0m
[38;5;195mINFO: Creating Sub Directories[0m
[1m[92mSUCCESS: Sub Directory path created at: C:\Users\sharv\Documents\Sharvil\Projects\AutoWeight\data\01_raw[0m

[1m[92mSUCCESS: Sub Directory path created at: C:\Users\sharv\Documents\Sharvil\Projects\AutoWeight\data\02_processed[0m

[1m[92mSUCCESS: Sub Directory path created at: C:\Users\sharv\Documents\Sharvil\Projects\AutoWeight\data\03_analysis[0m

[1m[92mSUCCESS: Created all directories successfully![0m



# 1. Data Fetch

## 1.1. Validate the configurations for data fecth

In [7]:
# Print the tickers defined in the config
ticker_list = config['config']['data']['tickers']

# Print the window and volitility_indicator defined in the config
window = config['config']['data']['window_in_days']
volatility_indicator = config['config']['data']['vix']

In [8]:
print("The tickers in the config list are:")

for idx, ticker in enumerate(ticker_list):
    print(f"    {idx+1}. {ticker}")

print(f"The window of data to evaluate is `{window}` days")

print(f"The volatility indicator used is `{volatility_indicator}`")

The tickers in the config list are:
    1. HDFCSML250.NS
    2. HDFCGOLD.NS
    3. HDFCNEXT50.NS
The window of data to evaluate is `900` days
The volatility indicator used is `^INDIAVIX`


## 1.2. Run the `fetch_data` pipeline

In [9]:
ohlcv_data = fetch_data(
    config_path=conf_path,
    save_data=True, 
    verbose=True
)

[38;5;195mINFO: Downloading data for ticker: HDFCSML250.NS[0m
[1m[92mSUCCESS: Successfully downloaded data for: HDFCSML250.NS[0m
[38;5;195mINFO: Processing data for ticker: HDFCSML250.NS[0m
[1m[92mSUCCESS: Successfully processed data for ticker: HDFCSML250.NS[0m

[38;5;195mINFO: Downloading data for ticker: HDFCGOLD.NS[0m
[1m[92mSUCCESS: Successfully downloaded data for: HDFCGOLD.NS[0m
[38;5;195mINFO: Processing data for ticker: HDFCGOLD.NS[0m
[1m[92mSUCCESS: Successfully processed data for ticker: HDFCGOLD.NS[0m

[38;5;195mINFO: Downloading data for ticker: HDFCNEXT50.NS[0m
[1m[92mSUCCESS: Successfully downloaded data for: HDFCNEXT50.NS[0m
[38;5;195mINFO: Processing data for ticker: HDFCNEXT50.NS[0m
[1m[92mSUCCESS: Successfully processed data for ticker: HDFCNEXT50.NS[0m

[38;5;195mINFO: Saving data for all tickers:[0m
[38;5;195mINFO:    1. HDFCSML250.NS[0m
[38;5;195mINFO:    2. HDFCGOLD.NS[0m
[38;5;195mINFO:    3. HDFCNEXT50.NS[0m
[1m[92mSUCCES

# 2. Calculate Correlation

## 2.1. Validate the correaltion parameters

In [10]:
correlation_params = config['config']['parameters']['correlation']

In [11]:
print(f"The method used for calculating correlation is: {correlation_params['method']}")
print(f"Minimum samples needed for correlation are: {correlation_params['min_observations']}")
print(f"Optimization strategy for filtering pairs: {correlation_params['optimization_strategy']}")
print(f"The price metrics defined inn the configs are:")
for idx, metric in enumerate(correlation_params['metrics']):
    print(f"    {idx+1}. {metric}")

if correlation_params['filter']['filter_n_pairs'] and correlation_params['filter']['filter_inverse_threshold']:
    print(f"The filters applied are:")
    print(f"    - Top Pairs set at: {correlation_params['filter']['top_n_pairs']}")
    print(f"    - Inverse threshold set at: {correlation_params['filter']['inverse_threshold']}")

if correlation_params['filter']['filter_inverse_threshold']:
    print(f"The filters applied are:")
    print(f"    - Inverse threshold set at: {correlation_params['filter']['inverse_threshold']}")

if correlation_params['filter']['filter_n_pairs']:
    print(f"The filters applied are:")
    print(f"    - Top Pairs set at: {correlation_params['filter']['top_n_pairs']}")

else:
    print(f"No filters applied")


The method used for calculating correlation is: pearson
Minimum samples needed for correlation are: 60
Optimization strategy for filtering pairs: negative
The price metrics defined inn the configs are:
    1. open
    2. high
    3. low
    4. close
    5. adj_close
The filters applied are:
    - Top Pairs set at: 10


## 2.2. Run the `calculate_pairwise_corrrelation` pipeline

In [12]:
correlation_results = correlation_results = calculate_pairwise_corrrelation(
    config_path=conf_path,
    save_data=True,
    verbose=True
)

[38;5;195mINFO:    Starting Correlation Pipeline for open metric of data[0m
[38;5;195mINFO:    Calculating PCT Change for open metric of data[0m
[38;5;195mINFO:    Calculating Correlation for PCT Change for open metric of data[0m
[38;5;195mINFO:    Adjusting Correlation based on optimization strategy for open metric of data[0m
[1m[38;2;255;165;0mWARN:   Number of filtered pairs are less than the `top_n_pairs` config set at 10. Saving all pairs[0m
[1m[92mSUCCESS:    Correlation Pipeline for open metric of dataframes, are saved.[0m
[1m[92mSUCCESS:    Correlation Pipeline for open metric of data finished successfully[0m

[38;5;195mINFO:    Starting Correlation Pipeline for high metric of data[0m
[38;5;195mINFO:    Calculating PCT Change for high metric of data[0m
[38;5;195mINFO:    Calculating Correlation for PCT Change for high metric of data[0m
[38;5;195mINFO:    Adjusting Correlation based on optimization strategy for high metric of data[0m
[1m[38;2;255;165;0

# 3. Calculate Valuation of Securities

## 3.1. Validate Valuation Config

In [13]:
valuation_params = config['config']['parameters']['valuation']

In [14]:
print(f"The method used for calculating valuation is: {valuation_params['method']}")
print(f"Window size for computing rolling metrics is: {valuation_params['window_in_days']}")
print(f"Price metric used for valuation is: {valuation_params['metric']}")
if valuation_params['apply_bounding']:
    print(f"Bounding of tanh will be applied on the valuation score")

The method used for calculating valuation is: log_price_z_score
Window size for computing rolling metrics is: 400
Price metric used for valuation is: adj_close
Bounding of tanh will be applied on the valuation score


In [15]:
valuation_results = raw_valuation_score_computation(
    config_path=conf_path,
    save_data=True,
    verbose=True
)

[38;5;195mINFO: Starting Valuation Pipeline.[0m
[38;5;195mINFO: Using price metric adj_close[0m
[38;5;195mINFO: Computing Valuation scores:[0m
[38;5;195mINFO:     Computing valuation score for column: log_price_HDFCSML250.NS[0m
[38;5;195mINFO:     Computing valuation score for column: log_price_HDFCGOLD.NS[0m
[38;5;195mINFO:     Computing valuation score for column: log_price_HDFCNEXT50.NS[0m
[1m[92mSUCCESS: Valuation Pipeline Run Successfully.[0m
