In [1]:
import pandas as pd
import numpy as np
import pickle

from datetime import datetime, date
from tqdm.auto import tqdm


# Setup

retrieve data

In [2]:
ticker = 'ms'
plus_target = 13

mcesdf = pd.read_csv(f'data/mces/Tp{plus_target}_mcesdf.csv')
ftraindf = pd.read_csv(f'data/ftraindf.csv')
pred_results = pd.read_csv(f'data/train/Tp{plus_target}_train_clustermembership.csv')


with open(f'data/cluster_details.pkl', 'rb') as handle:
    cluster_details = pickle.load(handle)

    
ftraindf['Date'] = ftraindf.apply(lambda x: datetime.strptime(x.Date, '%Y-%m-%d').date(), axis = 1)
pred_results['Date'] = pred_results.apply(lambda x: datetime.strptime(x.Date, '%Y-%m-%d').date(), axis = 1)

format data

In [3]:
ftraindf = ftraindf[ftraindf['Date'] >= date(2013, 1, 1)]
pred_results = pred_results[pred_results['Date'] >= date(2013, 1, 1)]

In [4]:
ftraindf = ftraindf.drop(['Date'], axis = 1).reset_index(drop = True)
pred_results = pred_results.drop(['Date', 'pc_pred'], axis = 1).reset_index(drop = True)

In [5]:
pred_results

Unnamed: 0,y_Tp13_PriceChg_c0,y_Tp13_PriceChg_c1,y_Tp13_PriceChg_c2,y_Tp13_PriceChg_c3,y_Tp13_PriceChg_c4,y_Tp13_PriceChg_c5,y_Tp13_PriceChg_c6
0,0.000201,0.012405,0.311965,0.551324,0.122438,0.000820,0.000000
1,0.000000,0.007669,0.193896,0.546503,0.247502,0.002692,0.000664
2,0.000000,0.007738,0.237279,0.564950,0.187699,0.001331,0.000144
3,0.000000,0.004448,0.170400,0.582561,0.239383,0.002153,0.000518
4,0.000295,0.009774,0.322438,0.569597,0.097038,0.000442,0.000000
...,...,...,...,...,...,...,...
1254,0.001554,0.034759,0.402485,0.484064,0.076096,0.000309,0.000000
1255,0.001549,0.033696,0.396113,0.475309,0.091925,0.000445,0.000000
1256,0.001723,0.035972,0.395852,0.479660,0.085245,0.000487,0.000000
1257,0.001909,0.039640,0.397043,0.477848,0.082031,0.000445,0.000000


list of selected features

In [6]:
selected = list(mcesdf.head(6)['cols'])

column dictionary per feature

In [7]:
column_dictionary = {}

for feature in selected: 
    column_dictionary[feature] = list(cluster_details[feature].keys())

# Main Body

Variables

In [8]:
plus_target = 13
fuzzy_rule_base = [{}] 
hebbian_weights = np.array(0)
target = f'y_Tp{plus_target}_PriceChg'
decaying_factor = 0.98

Main Body

In [9]:


count = 0 

for row in tqdm(range(len(ftraindf))): 
    
    rule = {} 
    
    instance_cols = [] 
    
    for feature in selected: 
        cols = column_dictionary[feature]
        max_x_cluster = ftraindf[cols].iloc[row].idxmax()
        rule[feature] = max_x_cluster
    
    minA_value = ftraindf[list(rule.values())].iloc[row].min() 
    
    # dealing with y 
    max_y_cluster = pred_results.iloc[row].idxmax() 
    maxC_value = pred_results.iloc[row].max() 
    rule[target] = max_y_cluster
    
    # hebbian weight
    weight = minA_value * maxC_value
    
    
    
    # update weights (yay)
    
    # check if rule exists
    if rule in fuzzy_rule_base: 
        
        # identify index 
        index = fuzzy_rule_base.index(rule)
        updated_weight = hebbian_weights[index] + weight
        
        # applying decaying factor        
        hebbian_weights = hebbian_weights*decaying_factor
        
        # update instance weight 
        hebbian_weights[index] = updated_weight
    
    else: 
        
        # applying decaying factor 
        hebbian_weights = hebbian_weights*decaying_factor
        
        # get instance weight 
        updated_weight = weight
        
        # append rule & respective weight 
        fuzzy_rule_base.append(rule)
        hebbian_weights = np.append(hebbian_weights, updated_weight)
    
#     count += 1
#     if count == 10: break

# delete the dummy elements
fuzzy_rule_base = fuzzy_rule_base[1:]
hebbian_weights = np.delete(hebbian_weights, [0])

hebbian_data = {
    'rule': fuzzy_rule_base, 
    'weight': hebbian_weights
}

hebbian = pd.DataFrame(hebbian_data)
hebbian = hebbian.sort_values(['weight'], ascending = False).reset_index(drop = True)
hebbian


  0%|          | 0/1259 [00:00<?, ?it/s]

Unnamed: 0,rule,weight
0,"{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c3', 'x_Tm4...",6.728894e+01
1,"{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c3', 'x_Tm4...",7.903054e-01
2,"{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c3', 'x_Tm4...",2.423265e-01
3,"{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c3', 'x_Tm4...",2.047599e-01
4,"{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c3', 'x_Tm4...",1.140776e-01
...,...,...
74,"{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c3', 'x_Tm4...",1.464999e-11
75,"{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c3', 'x_Tm4...",7.919811e-12
76,"{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c3', 'x_Tm4...",6.385002e-12
77,"{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c3', 'x_Tm4...",5.099959e-12


In [10]:
hebbian.to_csv(f'Tp{plus_target}.csv')

In [11]:
hebbian['rule'][0]

{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c3',
 'x_Tm4_PriceChg': 'x_Tm4_PriceChg_c3',
 'x_Tm7_PRoc1': 'x_Tm7_PRoc1_c2',
 'x_Tm5_PRoc1': 'x_Tm5_PRoc1_c2',
 'x_Tm3_PRoc1': 'x_Tm3_PRoc1_c2',
 'x_Tm3_PriceChg': 'x_Tm3_PriceChg_c3',
 'y_Tp13_PriceChg': 'y_Tp13_PriceChg_c3'}