In [1]:
import pandas as pd
import numpy as np
import pickle

from datetime import datetime, date
from tqdm.auto import tqdm


# Setup

retrieve data

In [3]:
ticker = 'vrtx'
plus_target = 13

mcesdf = pd.read_csv(f'data/mces/Tp{plus_target}_mcesdf.csv')
ftraindf = pd.read_csv(f'data/ftraindf.csv')
pred_results = pd.read_csv(f'data/train/Tp{plus_target}_train_clustermembership.csv')


with open(f'data/cluster_details.pkl', 'rb') as handle:
    cluster_details = pickle.load(handle)

    
ftraindf['Date'] = ftraindf.apply(lambda x: datetime.strptime(x.Date, '%Y-%m-%d').date(), axis = 1)
pred_results['Date'] = pred_results.apply(lambda x: datetime.strptime(x.Date, '%Y-%m-%d').date(), axis = 1)

format data

In [4]:
ftraindf = ftraindf[ftraindf['Date'] >= date(2013, 1, 1)]
pred_results = pred_results[pred_results['Date'] >= date(2013, 1, 1)]

In [5]:
ftraindf = ftraindf.drop(['Date'], axis = 1).reset_index(drop = True)
pred_results = pred_results.drop(['Date', 'pc_pred'], axis = 1).reset_index(drop = True)

In [6]:
pred_results

Unnamed: 0,y_Tp13_PriceChg_c0,y_Tp13_PriceChg_c1,y_Tp13_PriceChg_c2,y_Tp13_PriceChg_c3,y_Tp13_PriceChg_c4,y_Tp13_PriceChg_c5,y_Tp13_PriceChg_c6
0,0.0,0.168961,0.362670,0.400858,0.060282,0.004291,0.000438
1,0.0,0.087521,0.287369,0.537280,0.079419,0.005758,0.001655
2,0.0,0.053640,0.241857,0.570386,0.118094,0.011422,0.003761
3,0.0,0.046915,0.196731,0.561686,0.164898,0.020713,0.007907
4,0.0,0.027186,0.132486,0.626274,0.187654,0.018482,0.007424
...,...,...,...,...,...,...,...
1254,0.0,0.056704,0.233415,0.549225,0.140894,0.013743,0.004903
1255,0.0,0.050361,0.224989,0.553135,0.147405,0.016877,0.006070
1256,0.0,0.036741,0.188865,0.592321,0.157514,0.017262,0.006491
1257,0.0,0.040428,0.192504,0.583173,0.160670,0.016081,0.006247


list of selected features

In [7]:
selected = list(mcesdf.head(6)['cols'])

column dictionary per feature

In [8]:
column_dictionary = {}

for feature in selected: 
    column_dictionary[feature] = list(cluster_details[feature].keys())

# Main Body

Variables

In [9]:
plus_target = 13
fuzzy_rule_base = [{}] 
hebbian_weights = np.array(0)
target = f'y_Tp{plus_target}_PriceChg'
decaying_factor = 0.98

Main Body

In [10]:


count = 0 

for row in tqdm(range(len(ftraindf))): 
    
    rule = {} 
    
    instance_cols = [] 
    
    for feature in selected: 
        cols = column_dictionary[feature]
        max_x_cluster = ftraindf[cols].iloc[row].idxmax()
        rule[feature] = max_x_cluster
    
    minA_value = ftraindf[list(rule.values())].iloc[row].min() 
    
    # dealing with y 
    max_y_cluster = pred_results.iloc[row].idxmax() 
    maxC_value = pred_results.iloc[row].max() 
    rule[target] = max_y_cluster
    
    # hebbian weight
    weight = minA_value * maxC_value
    
    
    
    # update weights (yay)
    
    # check if rule exists
    if rule in fuzzy_rule_base: 
        
        # identify index 
        index = fuzzy_rule_base.index(rule)
        updated_weight = hebbian_weights[index] + weight
        
        # applying decaying factor        
        hebbian_weights = hebbian_weights*decaying_factor
        
        # update instance weight 
        hebbian_weights[index] = updated_weight
    
    else: 
        
        # applying decaying factor 
        hebbian_weights = hebbian_weights*decaying_factor
        
        # get instance weight 
        updated_weight = weight
        
        # append rule & respective weight 
        fuzzy_rule_base.append(rule)
        hebbian_weights = np.append(hebbian_weights, updated_weight)
    
#     count += 1
#     if count == 10: break

# delete the dummy elements
fuzzy_rule_base = fuzzy_rule_base[1:]
hebbian_weights = np.delete(hebbian_weights, [0])

hebbian_data = {
    'rule': fuzzy_rule_base, 
    'weight': hebbian_weights
}

hebbian = pd.DataFrame(hebbian_data)
hebbian = hebbian.sort_values(['weight'], ascending = False).reset_index(drop = True)
hebbian


  0%|          | 0/1259 [00:00<?, ?it/s]

Unnamed: 0,rule,weight
0,"{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c2', 'x_Tm3...",3.940964e+00
1,"{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c3', 'x_Tm3...",2.752146e+00
2,"{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c2', 'x_Tm3...",2.356377e+00
3,"{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c2', 'x_Tm3...",1.711592e+00
4,"{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c2', 'x_Tm3...",1.174402e+00
...,...,...
144,"{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c3', 'x_Tm3...",7.428378e-12
145,"{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c5', 'x_Tm3...",0.000000e+00
146,"{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c6', 'x_Tm3...",0.000000e+00
147,"{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c5', 'x_Tm3...",0.000000e+00


In [11]:
hebbian.to_csv(f'Tp{plus_target}.csv')

In [12]:
hebbian['rule'][0]

{'x_Tm1_PriceChg': 'x_Tm1_PriceChg_c2',
 'x_Tm3_PriceChg': 'x_Tm3_PriceChg_c3',
 'x_Tm2_PriceChg': 'x_Tm2_PriceChg_c2',
 'x_Tm4_PriceChg': 'x_Tm4_PriceChg_c2',
 'x_Tm1_PRoc1': 'x_Tm1_PRoc1_c3',
 'x_Tm11_PRoc1': 'x_Tm11_PRoc1_c3',
 'y_Tp13_PriceChg': 'y_Tp13_PriceChg_c3'}