In [1]:
import pandas as pd
import numpy as np
import pickle

from datetime import datetime, date
from tqdm.auto import tqdm


# Setup

retrieve data

In [2]:
ticker = 'adbe'
plus_target = 13

mcesdf = pd.read_csv(f'data/mces/Tp{plus_target}_mcesdf.csv')
ftraindf = pd.read_csv(f'data/ftraindf.csv')
pred_results = pd.read_csv(f'data/train/Tp{plus_target}_train_clustermembership.csv')


with open(f'data/cluster_details.pkl', 'rb') as handle:
    cluster_details = pickle.load(handle)

    
ftraindf['Date'] = ftraindf.apply(lambda x: datetime.strptime(x.Date, '%Y-%m-%d').date(), axis = 1)
pred_results['Date'] = pred_results.apply(lambda x: datetime.strptime(x.Date, '%Y-%m-%d').date(), axis = 1)

format data

In [3]:
ftraindf = ftraindf[ftraindf['Date'] >= date(2013, 1, 1)]
pred_results = pred_results[pred_results['Date'] >= date(2013, 1, 1)]

In [4]:
ftraindf = ftraindf.drop(['Date'], axis = 1).reset_index(drop = True)
pred_results = pred_results.drop(['Date', 'pc_pred'], axis = 1).reset_index(drop = True)

In [5]:
pred_results

Unnamed: 0,y_Tp13_PriceChg_c0,y_Tp13_PriceChg_c1,y_Tp13_PriceChg_c2,y_Tp13_PriceChg_c3,y_Tp13_PriceChg_c4,y_Tp13_PriceChg_c5,y_Tp13_PriceChg_c6
0,0.002120,0.004375,0.266127,0.493386,0.186901,0.044987,0.0
1,0.002857,0.005408,0.309099,0.496721,0.148496,0.036057,0.0
2,0.000664,0.001585,0.214818,0.550094,0.193023,0.038622,0.0
3,0.002510,0.004873,0.381062,0.512030,0.081359,0.017735,0.0
4,0.005157,0.008961,0.370797,0.463804,0.116072,0.033804,0.0
...,...,...,...,...,...,...,...
1254,0.001974,0.003710,0.324441,0.540728,0.105847,0.022732,0.0
1255,0.001664,0.003311,0.295867,0.549136,0.124430,0.024935,0.0
1256,0.002673,0.005138,0.381671,0.511448,0.080794,0.017821,0.0
1257,0.002418,0.004678,0.377932,0.515964,0.081358,0.017238,0.0


list of selected features

In [6]:
selected = list(mcesdf.head(6)['cols'])

column dictionary per feature

In [7]:
column_dictionary = {}

for feature in selected: 
    column_dictionary[feature] = list(cluster_details[feature].keys())

# Main Body

Variables

In [8]:
plus_target = 13
fuzzy_rule_base = [{}] 
hebbian_weights = np.array(0)
target = f'y_Tp{plus_target}_PriceChg'
decaying_factor = 0.98

Main Body

In [9]:


count = 0 

for row in tqdm(range(len(ftraindf))): 
    
    rule = {} 
    
    instance_cols = [] 
    
    for feature in selected: 
        cols = column_dictionary[feature]
        max_x_cluster = ftraindf[cols].iloc[row].idxmax()
        rule[feature] = max_x_cluster
    
    minA_value = ftraindf[list(rule.values())].iloc[row].min() 
    
    # dealing with y 
    max_y_cluster = pred_results.iloc[row].idxmax() 
    maxC_value = pred_results.iloc[row].max() 
    rule[target] = max_y_cluster
    
    # hebbian weight
    weight = minA_value * maxC_value
    
    
    
    # update weights (yay)
    
    # check if rule exists
    if rule in fuzzy_rule_base: 
        
        # identify index 
        index = fuzzy_rule_base.index(rule)
        updated_weight = hebbian_weights[index] + weight
        
        # applying decaying factor        
        hebbian_weights = hebbian_weights*decaying_factor
        
        # update instance weight 
        hebbian_weights[index] = updated_weight
    
    else: 
        
        # applying decaying factor 
        hebbian_weights = hebbian_weights*decaying_factor
        
        # get instance weight 
        updated_weight = weight
        
        # append rule & respective weight 
        fuzzy_rule_base.append(rule)
        hebbian_weights = np.append(hebbian_weights, updated_weight)
    
#     count += 1
#     if count == 10: break

# delete the dummy elements
fuzzy_rule_base = fuzzy_rule_base[1:]
hebbian_weights = np.delete(hebbian_weights, [0])

hebbian_data = {
    'rule': fuzzy_rule_base, 
    'weight': hebbian_weights
}

hebbian = pd.DataFrame(hebbian_data)
hebbian = hebbian.sort_values(['weight'], ascending = False).reset_index(drop = True)
hebbian


  0%|          | 0/1259 [00:00<?, ?it/s]

Unnamed: 0,rule,weight
0,"{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c3', 'x_Tm4...",7.839862e+00
1,"{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c3', 'x_Tm4...",3.029525e+00
2,"{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c3', 'x_Tm4...",1.418454e+00
3,"{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c3', 'x_Tm4...",1.229549e+00
4,"{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c3', 'x_Tm4...",9.535856e-01
...,...,...
74,"{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c3', 'x_Tm4...",3.568073e-10
75,"{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c3', 'x_Tm4...",8.051991e-11
76,"{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c3', 'x_Tm4...",7.226727e-11
77,"{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c3', 'x_Tm4...",6.964535e-11


In [10]:
hebbian.to_csv(f'Tp{plus_target}.csv')

In [11]:
hebbian['rule'][0]

{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c3',
 'x_Tm4_PriceChg': 'x_Tm4_PriceChg_c3',
 'x_Tm3_PriceChg': 'x_Tm3_PriceChg_c3',
 'x_Tm2_PriceChg': 'x_Tm2_PriceChg_c3',
 'x_Tm8_PriceChg': 'x_Tm8_PriceChg_c3',
 'x_Tm1_VolChg': 'x_Tm1_VolChg_c0',
 'y_Tp13_PriceChg': 'y_Tp13_PriceChg_c3'}