In [49]:
import os
from pytse_client.download import download_financial_indexes
import pandas as pd
import jalali_pandas
import math
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import export_graphviz 
import warnings
warnings.filterwarnings("ignore")

desktop = os.path.join(os.path.join(os.path.expanduser('~')), 'Desktop') 
bourse_view = os.path.join(desktop, "sector_slc", "bourse_view")
dataset_path = os.path.join(bourse_view, "dataset")
dataset_df = pd.read_csv(dataset_path + "\\" + "dataset.csv")
dataset_df = dataset_df.drop("Unnamed: 0",axis=1)
list_of_sectors = dataset_df.sector.unique().tolist()
dataset_df["P/E-ttm_1_lag"] = dataset_df["P/E-ttm_1_lag"] - dataset_df["market_pe_1_lag"] 
dataset_df["P/E-ttm_2_lag"] = dataset_df["P/E-ttm_2_lag"] - dataset_df["market_pe_2_lag"] 
dataset_df["quarterly_return_1_lag_excess_usd"] = dataset_df["quarterly_return_1_lag"] - dataset_df["quarterly_usd_return_1_lag"]
dataset_df["quarterly_return_2_lag_excess_usd"] = dataset_df["quarterly_return_2_lag"] - dataset_df["quarterly_usd_return_2_lag"]
dataset_df["quarterly_return_1_lag_excess_index"] = dataset_df["quarterly_return_1_lag"] - dataset_df["index_quarterly_return_1_lag"]
dataset_df["quarterly_return_2_lag_excess_index"] = dataset_df["quarterly_return_2_lag"] - dataset_df["index_quarterly_return_2_lag"]


In [140]:
year_to_predict = 1396
quarter_to_predict = 1

In [141]:
first_occurence = dataset_df[(dataset_df.quarter == quarter_to_predict) & (dataset_df.year == year_to_predict)].iloc[0].name
train_set = dataset_df[:first_occurence]

list_of_features = ["P/E-ttm_1_lag", "P/E-ttm_2_lag", "quarterly_return_1_lag", "quarterly_return_2_lag",
                    "quarterly_return_1_lag_excess_usd", "quarterly_return_2_lag_excess_usd",
                    "quarterly_return_1_lag_excess_index", "quarterly_return_2_lag_excess_index"
                    ]
target = "quarter_excess_return"
regressor = DecisionTreeRegressor(random_state = 0, max_depth=5, criterion="absolute_error", min_samples_split=300, min_samples_leaf= 150)
X = train_set[list_of_features]
y = train_set[target]
regressor.fit(X, y)

result_df = pd.DataFrame(columns=["sector", "predicted_excess_return","realized_excess_return"])
for sector in list_of_sectors:
    row = dataset_df[(dataset_df.quarter == quarter_to_predict) & (dataset_df.year == year_to_predict) & (dataset_df.sector == sector)].iloc[0]
    row_input = row[list_of_features]
    predicted_excess_return = regressor.predict([row_input])
    realized_excess_return = row["quarter_excess_return"] 
    new_row = [sector, predicted_excess_return[0]*100, realized_excess_return*100]
    result_df.loc[len(result_df)] = new_row
result_df = result_df.sort_values(by="predicted_excess_return",ascending=False).reset_index().drop("index",axis=1)
long_leg_return = result_df.realized_excess_return.iloc[:3].mean()
short_leg_return = result_df.realized_excess_return.iloc[-3:].mean()
index_return = row.index_quarterly_return * 100
print(long_leg_return,short_leg_return)
result_df

10.772290475934057 3.209886266947507


Unnamed: 0,sector,predicted_excess_return,realized_excess_return
0,چاپ و نشر,5.619662,20.825402
1,محصولات پاک کننده,2.272923,-0.44115
2,شیرینیجات,2.272923,11.932619
3,مواد شیمیایی-متنوع,1.502638,2.035065
4,تولید کود و ترکیبات نیتروژن,1.502638,-13.979326
5,تولید فلزات گرانبهای غیراهن,1.502638,-10.041248
6,کانی های فلزی,1.502638,-14.019904
7,کاشی و سرامیک,1.502638,0.673579
8,سیمان، اهک و گچ,1.502638,-2.878142
9,محصولات کشاورزی,1.502638,9.383446


In [142]:
last_year = 1401
last_year_quarter = 3

performance_df = pd.DataFrame(columns=["year", "quarter","long_leg_return","short_leg_return"])

for year in range(year_to_predict, last_year + 1):

    if (year == year_to_predict) and (year_to_predict != last_year):
        for quarter in range(quarter_to_predict, 5):
            first_occurence = dataset_df[(dataset_df.quarter == quarter) & (dataset_df.year == year)].iloc[0].name
            train_set = dataset_df[:first_occurence]

            list_of_features = ["P/E-ttm_1_lag", "P/E-ttm_2_lag", "quarterly_return_1_lag", "quarterly_return_2_lag",
                                "quarterly_return_1_lag_excess_usd", "quarterly_return_2_lag_excess_usd",
                                "quarterly_return_1_lag_excess_index", "quarterly_return_2_lag_excess_index"
                                ]
            target = "quarter_excess_return"
            regressor = DecisionTreeRegressor(random_state = 0, max_depth=5, criterion="absolute_error", min_samples_split=300, min_samples_leaf= 150)
            X = train_set[list_of_features]
            y = train_set[target]
            regressor.fit(X, y)

            result_df = pd.DataFrame(columns=["sector", "predicted_excess_return","realized_excess_return"])
            for sector in list_of_sectors:
                row = dataset_df[(dataset_df.quarter == quarter) & (dataset_df.year == year) & (dataset_df.sector == sector)].iloc[0]
                row_input = row[list_of_features]
                predicted_excess_return = regressor.predict([row_input])
                realized_excess_return = row["quarter_excess_return"] 
                new_row = [sector, predicted_excess_return[0]*100, realized_excess_return*100]
                result_df.loc[len(result_df)] = new_row
            result_df = result_df.sort_values(by="predicted_excess_return",ascending=False).reset_index().drop("index",axis=1)
            long_leg_return = result_df.realized_excess_return.iloc[:3].mean()
            short_leg_return = result_df.realized_excess_return.iloc[-3:].mean()
            index_return = row.index_quarterly_return * 100
            new_row = [year, quarter, long_leg_return, short_leg_return]
            performance_df.loc[len(performance_df)] = new_row

    elif year == last_year:
        for quarter in range(1, last_year_quarter + 1):
            first_occurence = dataset_df[(dataset_df.quarter == quarter) & (dataset_df.year == year)].iloc[0].name
            train_set = dataset_df[:first_occurence]

            list_of_features = ["P/E-ttm_1_lag", "P/E-ttm_2_lag", "quarterly_return_1_lag", "quarterly_return_2_lag",
                                "quarterly_return_1_lag_excess_usd", "quarterly_return_2_lag_excess_usd",
                                "quarterly_return_1_lag_excess_index", "quarterly_return_2_lag_excess_index"
                                ]
            target = "quarter_excess_return"
            regressor = DecisionTreeRegressor(random_state = 0, max_depth=5, criterion="absolute_error", min_samples_split=300, min_samples_leaf= 150)
            X = train_set[list_of_features]
            y = train_set[target]
            regressor.fit(X, y)

            result_df = pd.DataFrame(columns=["sector", "predicted_excess_return","realized_excess_return"])
            for sector in list_of_sectors:
                row = dataset_df[(dataset_df.quarter == quarter) & (dataset_df.year == year) & (dataset_df.sector == sector)].iloc[0]
                row_input = row[list_of_features]
                predicted_excess_return = regressor.predict([row_input])
                realized_excess_return = row["quarter_excess_return"] 
                new_row = [sector, predicted_excess_return[0]*100, realized_excess_return*100]
                result_df.loc[len(result_df)] = new_row
            result_df = result_df.sort_values(by="predicted_excess_return",ascending=False).reset_index().drop("index",axis=1)
            long_leg_return = result_df.realized_excess_return.iloc[:3].mean()
            short_leg_return = result_df.realized_excess_return.iloc[-3:].mean()
            index_return = row.index_quarterly_return * 100
            new_row = [year, quarter, long_leg_return, short_leg_return]
            performance_df.loc[len(performance_df)] = new_row

    else:
        for quarter in range(1, 5):
            first_occurence = dataset_df[(dataset_df.quarter == quarter) & (dataset_df.year == year)].iloc[0].name
            train_set = dataset_df[:first_occurence]

            list_of_features = ["P/E-ttm_1_lag", "P/E-ttm_2_lag", "quarterly_return_1_lag", "quarterly_return_2_lag",
                                "quarterly_return_1_lag_excess_usd", "quarterly_return_2_lag_excess_usd",
                                "quarterly_return_1_lag_excess_index", "quarterly_return_2_lag_excess_index"
                                ]
            target = "quarter_excess_return"
            regressor = DecisionTreeRegressor(random_state = 0, max_depth=5, criterion="absolute_error", min_samples_split=300, min_samples_leaf= 150)
            X = train_set[list_of_features]
            y = train_set[target]
            regressor.fit(X, y)

            result_df = pd.DataFrame(columns=["sector", "predicted_excess_return","realized_excess_return"])
            for sector in list_of_sectors:
                row = dataset_df[(dataset_df.quarter == quarter) & (dataset_df.year == year) & (dataset_df.sector == sector)].iloc[0]
                row_input = row[list_of_features]
                predicted_excess_return = regressor.predict([row_input])
                realized_excess_return = row["quarter_excess_return"] 
                new_row = [sector, predicted_excess_return[0]*100, realized_excess_return*100]
                result_df.loc[len(result_df)] = new_row
            result_df = result_df.sort_values(by="predicted_excess_return",ascending=False).reset_index().drop("index",axis=1)
            long_leg_return = result_df.realized_excess_return.iloc[:3].mean()
            short_leg_return = result_df.realized_excess_return.iloc[-3:].mean()
            index_return = row.index_quarterly_return * 100
            new_row = [year, quarter, long_leg_return, short_leg_return]
            performance_df.loc[len(performance_df)] = new_row

In [143]:
performance_df

Unnamed: 0,year,quarter,long_leg_return,short_leg_return
0,1396.0,1.0,10.77229,3.209886
1,1396.0,2.0,-7.823025,26.519346
2,1396.0,3.0,4.592911,-18.552917
3,1396.0,4.0,0.079338,-13.829183
4,1397.0,1.0,5.666512,-12.260593
5,1397.0,2.0,6.591425,-5.41314
6,1397.0,3.0,2.315362,8.496984
7,1397.0,4.0,2.111471,-3.333252
8,1398.0,1.0,4.578093,16.51201
9,1398.0,2.0,15.722861,146.327541


In [144]:
performance_df["long_leg_return"].gt(0).sum()

14