In [1]:
import pandas as pd
import numpy as np
import pickle

from datetime import datetime, date
from tqdm.auto import tqdm


# Setup

retrieve data

In [2]:
ticker = 'xom'
plus_target = 13

mcesdf = pd.read_csv(f'data/mces/Tp{plus_target}_mcesdf.csv')
ftraindf = pd.read_csv(f'data/ftraindf.csv')
pred_results = pd.read_csv(f'data/train/Tp{plus_target}_train_clustermembership.csv')


with open(f'data/cluster_details.pkl', 'rb') as handle:
    cluster_details = pickle.load(handle)

    
ftraindf['Date'] = ftraindf.apply(lambda x: datetime.strptime(x.Date, '%Y-%m-%d').date(), axis = 1)
pred_results['Date'] = pred_results.apply(lambda x: datetime.strptime(x.Date, '%Y-%m-%d').date(), axis = 1)

format data

In [3]:
ftraindf = ftraindf[ftraindf['Date'] >= date(2013, 1, 1)]
pred_results = pred_results[pred_results['Date'] >= date(2013, 1, 1)]

In [4]:
ftraindf = ftraindf.drop(['Date'], axis = 1).reset_index(drop = True)
pred_results = pred_results.drop(['Date', 'pc_pred'], axis = 1).reset_index(drop = True)

In [5]:
pred_results

Unnamed: 0,y_Tp13_PriceChg_c0,y_Tp13_PriceChg_c1,y_Tp13_PriceChg_c2,y_Tp13_PriceChg_c3,y_Tp13_PriceChg_c4,y_Tp13_PriceChg_c5
0,0.066004,0.499098,0.378958,0.050244,0.005078,0.0
1,0.029474,0.373417,0.489010,0.088186,0.017522,0.0
2,0.031983,0.349128,0.479501,0.108863,0.027972,0.0
3,0.022537,0.338522,0.485837,0.120602,0.030395,0.0
4,0.046368,0.439806,0.438562,0.058434,0.015281,0.0
...,...,...,...,...,...,...
1254,0.036429,0.351750,0.453714,0.124107,0.031050,0.0
1255,0.019589,0.319711,0.428176,0.176650,0.052131,0.0
1256,0.007850,0.219208,0.446027,0.249908,0.072312,0.0
1257,0.011971,0.267855,0.441333,0.213400,0.061305,0.0


list of selected features

In [6]:
selected = list(mcesdf.head(6)['cols'])

column dictionary per feature

In [7]:
column_dictionary = {}

for feature in selected: 
    column_dictionary[feature] = list(cluster_details[feature].keys())

# Main Body

Variables

In [12]:
plus_target = 13
fuzzy_rule_base = [{}] 
hebbian_weights = np.array(0)
target = f'y_Tp{plus_target}_PriceChg'
decaying_factor = 0.98

Main Body

In [13]:


count = 0 

for row in tqdm(range(len(ftraindf))): 
    
    rule = {} 
    
    instance_cols = [] 
    
    for feature in selected: 
        cols = column_dictionary[feature]
        max_x_cluster = ftraindf[cols].iloc[row].idxmax()
        rule[feature] = max_x_cluster
    
    minA_value = ftraindf[list(rule.values())].iloc[row].min() 
    
    # dealing with y 
    max_y_cluster = pred_results.iloc[row].idxmax() 
    maxC_value = pred_results.iloc[row].max() 
    rule[target] = max_y_cluster
    
    # hebbian weight
    weight = minA_value * maxC_value
    
    
    
    # update weights (yay)
    
    # check if rule exists
    if rule in fuzzy_rule_base: 
        
        # identify index 
        index = fuzzy_rule_base.index(rule)
        updated_weight = hebbian_weights[index] + weight
        
        # applying decaying factor        
        hebbian_weights = hebbian_weights*decaying_factor
        
        # update instance weight 
        hebbian_weights[index] = updated_weight
    
    else: 
        
        # applying decaying factor 
        hebbian_weights = hebbian_weights*decaying_factor
        
        # get instance weight 
        updated_weight = weight
        
        # append rule & respective weight 
        fuzzy_rule_base.append(rule)
        hebbian_weights = np.append(hebbian_weights, updated_weight)
    
#     count += 1
#     if count == 10: break

# delete the dummy elements
fuzzy_rule_base = fuzzy_rule_base[1:]
hebbian_weights = np.delete(hebbian_weights, [0])

hebbian_data = {
    'rule': fuzzy_rule_base, 
    'weight': hebbian_weights
}

hebbian = pd.DataFrame(hebbian_data)
hebbian = hebbian.sort_values(['weight'], ascending = False).reset_index(drop = True)
hebbian


  0%|          | 0/1259 [00:00<?, ?it/s]

Unnamed: 0,rule,weight
0,"{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c3', 'x_Tm2...",6.845523e+00
1,"{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c4', 'x_Tm2...",1.500664e+00
2,"{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c3', 'x_Tm2...",1.198813e+00
3,"{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c3', 'x_Tm2...",1.052767e+00
4,"{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c3', 'x_Tm2...",1.029090e+00
...,...,...
211,"{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c3', 'x_Tm2...",9.883042e-12
212,"{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c2', 'x_Tm2...",4.604061e-12
213,"{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c3', 'x_Tm2...",4.595795e-12
214,"{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c3', 'x_Tm2...",2.741476e-12


In [14]:
hebbian.to_csv(f'Tp{plus_target}.csv')