In [60]:
import pandas as pd
import numpy as np
import pickle

from datetime import datetime, date
from tqdm.auto import tqdm


# Setup

retrieve data

In [61]:
import pandas as pd
import pickle
from datetime import datetime

ticker = 'U11.SI'
plus_target = 13

# Load data
mcesdf = pd.read_csv(f'data/{ticker}/Tp{plus_target}/Tp{plus_target}_mcesdf.csv')
ftraindf = pd.read_csv(f'data/{ticker}/ftraindf.csv')
pred_results = pd.read_csv(f'data/{ticker}/train/Tp{plus_target}_train_clustermembership.csv')

# Load cluster interpretation details
with open(f'/home/priya/Desktop/fyp/Src alwin/Src/data/{ticker}/cluster_details.pkl', 'rb') as handle:
    cluster_details = pickle.load(handle)

# Convert 'Date' in ftraindf to datetime.date
ftraindf['Date'] = pd.to_datetime(ftraindf['Date']).dt.date

# If pred_results doesn't contain date, align using ftraindf
if len(pred_results) <= len(ftraindf):
    pred_results['Date'] = ftraindf['Date'].iloc[:len(pred_results)].values
else:
    raise ValueError("Mismatch in row count between ftraindf and pred_results")

# Now pred_results has a proper Date column
print(pred_results[['Date']].head())


         Date
0  2000-01-21
1  2000-01-24
2  2000-01-25
3  2000-01-26
4  2000-01-27


format data

In [62]:
ftraindf = ftraindf[ftraindf['Date'] >= date(2013, 1, 1)]
pred_results = pred_results[pred_results['Date'] >= date(2013, 1, 1)]

In [63]:
ftraindf = ftraindf.drop(['Date'], axis = 1).reset_index(drop = True)
pred_results = pred_results.drop(['Date', 'pc_pred'], axis = 1).reset_index(drop = True)

In [64]:
pred_results

Unnamed: 0.1,Unnamed: 0,y_Tp13_PriceChg_c0,y_Tp13_PriceChg_c1,y_Tp13_PriceChg_c2,y_Tp13_PriceChg_c3,y_Tp13_PriceChg_c4,y_Tp13_PriceChg_c5,y_Tp13_PriceChg_c6
0,3305,0.025752,0.015982,0.284012,0.528466,0.139746,0.009322,-0.003280
1,3306,0.001262,0.002500,0.225787,0.542786,0.233658,-0.005050,-0.000943
2,3307,0.114254,0.224235,0.353752,0.091075,0.142901,0.007501,0.066282
3,3308,0.036085,0.035791,0.343069,0.472719,0.137547,-0.049248,0.024038
4,3309,0.000050,-0.033973,0.246241,0.421255,0.314916,0.016991,0.034519
...,...,...,...,...,...,...,...,...
1248,4553,-0.014901,-0.007506,0.232095,0.616197,0.194765,-0.003984,-0.016664
1249,4554,0.018491,0.014299,0.318038,0.440066,0.201101,0.000745,0.007261
1250,4555,-0.042225,-0.074640,0.062175,-0.014042,0.171204,0.664514,0.233013
1251,4556,0.152115,0.135384,0.450654,0.103704,0.217214,-0.025583,-0.033488


list of selected features

In [65]:
selected = list(mcesdf.head(6)['cols'])

column dictionary per feature

In [66]:
column_dictionary = {}

for feature in selected: 
    column_dictionary[feature] = list(cluster_details[feature].keys())

# Main Body

Variables

In [67]:
plus_target = 13
fuzzy_rule_base = [{}] 
hebbian_weights = np.array(0)
target = f'y_Tp{plus_target}_PriceChg'
decaying_factor = 0.98

Main Body

In [68]:


count = 0 

for row in tqdm(range(len(ftraindf))): 
    
    rule = {} 
    
    instance_cols = [] 
    
    for feature in selected: 
        cols = column_dictionary[feature]
        max_x_cluster = ftraindf[cols].iloc[row].idxmax()
        rule[feature] = max_x_cluster
    
    minA_value = ftraindf[list(rule.values())].iloc[row].min() 
    
    # dealing with y 
    max_y_cluster = pred_results.iloc[row].idxmax() 
    maxC_value = pred_results.iloc[row].max() 
    rule[target] = max_y_cluster
    
    # hebbian weight
    weight = minA_value * maxC_value
    
    
    
    # update weights (yay)
    
    # check if rule exists
    if rule in fuzzy_rule_base: 
        
        # identify index 
        index = fuzzy_rule_base.index(rule)
        updated_weight = hebbian_weights[index] + weight
        
        # applying decaying factor        
        hebbian_weights = hebbian_weights*decaying_factor
        
        # update instance weight 
        hebbian_weights[index] = updated_weight
    
    else: 
        
        # applying decaying factor 
        hebbian_weights = hebbian_weights*decaying_factor
        
        # get instance weight 
        updated_weight = weight
        
        # append rule & respective weight 
        fuzzy_rule_base.append(rule)
        hebbian_weights = np.append(hebbian_weights, updated_weight)
    
#     count += 1
#     if count == 10: break

# delete the dummy elements
fuzzy_rule_base = fuzzy_rule_base[1:]
hebbian_weights = np.delete(hebbian_weights, [0])

hebbian_data = {
    'rule': fuzzy_rule_base, 
    'weight': hebbian_weights
}

hebbian = pd.DataFrame(hebbian_data)
hebbian = hebbian.sort_values(['weight'], ascending = False).reset_index(drop = True)
hebbian


  0%|          | 0/1253 [00:00<?, ?it/s]

Unnamed: 0,rule,weight
0,"{'x_Tm1_PRoc1': 'x_Tm1_PRoc1_c2', 'x_Tm11_PRoc...",1.978627e+04
1,"{'x_Tm1_PRoc1': 'x_Tm1_PRoc1_c2', 'x_Tm11_PRoc...",9.313416e+03
2,"{'x_Tm1_PRoc1': 'x_Tm1_PRoc1_c2', 'x_Tm11_PRoc...",7.795837e+03
3,"{'x_Tm1_PRoc1': 'x_Tm1_PRoc1_c2', 'x_Tm11_PRoc...",6.877351e+03
4,"{'x_Tm1_PRoc1': 'x_Tm1_PRoc1_c2', 'x_Tm11_PRoc...",6.851234e+03
...,...,...
249,"{'x_Tm1_PRoc1': 'x_Tm1_PRoc1_c3', 'x_Tm11_PRoc...",8.760749e-08
250,"{'x_Tm1_PRoc1': 'x_Tm1_PRoc1_c2', 'x_Tm11_PRoc...",8.083163e-08
251,"{'x_Tm1_PRoc1': 'x_Tm1_PRoc1_c2', 'x_Tm11_PRoc...",4.485817e-08
252,"{'x_Tm1_PRoc1': 'x_Tm1_PRoc1_c3', 'x_Tm11_PRoc...",3.694018e-08


In [69]:
hebbian.to_csv(f'{ticker}Tp{plus_target}.csv')

In [70]:
hebbian['rule'][0]

{'x_Tm1_PRoc1': 'x_Tm1_PRoc1_c2',
 'x_Tm11_PRoc1': 'x_Tm11_PRoc1_c2',
 'x_Tm2_VolChg': 'x_Tm2_VolChg_c1',
 'x_Tm12_VolChg': 'x_Tm12_VolChg_c0',
 'x_Tm2_PriceChg': 'x_Tm2_PriceChg_c2',
 'x_Tm5_PriceChg': 'x_Tm5_PriceChg_c2',
 'y_Tp13_PriceChg': 'Unnamed: 0'}