In [None]:
import pandas as pd
import numpy as np
from scipy import stats
from tqdm import tqdm_notebook
import warnings
warnings.filterwarnings('ignore')

__author__ = 'HK Dambanemuya'
__version__ = 'Python 3'

def compute_herding(window_size):
    CoH = []
    for key in tqdm_notebook(herding_df['ListingKey']):
        temp_df = bids.query(f"ListingKey == '{key}'").sort_values("CreationDate")
        amounts = temp_df['Amount'].values
        window_size = window_size
        correlations = []

        for i in range(len(amounts) - window_size + 1):
            # Generate sliding windows of size window_size
            windows = [amounts[i+j:i+j+window_size] for j in range(window_size)]

            # Filter out incomplete windows
            valid_windows = [window for window in windows if len(window) == window_size]

            # Check if there are enough valid windows to proceed with correlation calculation
            if len(valid_windows) != window_size:
                continue

            # Compute the correlation coefficient matrix
            corr_matrix = np.corrcoef(valid_windows, rowvar=False)

            # Extract correlation coefficients from the matrix
            for j in range(window_size - 1):
                for k in range(j + 1, window_size):
                    correlations.append(corr_matrix[j, k])

        if len(correlations) > 0:
            CoH.append(np.mean(correlations))
        else:
            CoH.append(-2)
    return CoH

# Import bid-level data
bids = pd.read_csv("../../Data/bid_notick.txt", sep="|")
# Filter only the necessary columns
bids = bids[['ListingKey', 'CreationDate', 'Amount']]
# Filter by Funded Listing (Paid or Defaulted)
bid_data = pd.read_csv('../../Data/colending_data.csv')
bids = bids[bids['ListingKey'].isin(bid_data['ListingKey'].values)]

herding_df = pd.DataFrame()
herding_df['ListingKey'] = bids['ListingKey'].unique()

for step in [1,3,5,7]:
    herding_df[f'CoH{step}'] = compute_herding(step)

herding_df.to_csv('../../Data/herding_data_optimized.csv', index=False)

In [None]:
np.std([1,1,45]) / np.mean([1,1,45]), np.mean([1,1,45])

In [None]:
np.std([6,12,12]) / np.mean([6,12,12]), np.mean([6,12,12])

In [None]:
# 1 step herding coefficients
herding1Coefficients = []
for key in tqdm_notebook(herding_df['ListingKey']):
    temp_df = bids.query(f"ListingKey == '{key}'").sort_values("CreationDate")
    amounts = temp_df['Amount'].values
    a,b = [], [] # create 2 dimension series to calculate 1-step herding coefficient
    for i in range(len(amounts)-1):
        a.append(amounts[i])
        b.append(amounts[i+1])
    try:
        # calculate herding coefficient
        herding1Coefficients.append(stats.pearsonr(a,b)[0])
    except:
        herding1Coefficients.append(-2)

In [None]:
herding3Coefficients = []
for key in tqdm_notebook(herding_df['ListingKey']):
    temp_df = bids.query(f"ListingKey == '{key}'").sort_values("CreationDate")
    amounts = temp_df['Amount'].values
    a,b,c,d = [], [], [], [] # create 4 dimension series to calculate 3-step herding coefficient
    for i in range(len(amounts)-3):
        a.append(amounts[i])
        b.append(amounts[i+1])
        c.append(amounts[i+2])
        d.append(amounts[i+3])
    try:
        # calculate herding coefficient
        herding3Coefficients.append( (stats.pearsonr(a,b)[0] + 
                                      stats.pearsonr(a,c)[0] + 
                                      stats.pearsonr(a,d)[0] + 
                                      stats.pearsonr(b,c)[0] + 
                                      stats.pearsonr(b,d)[0] + 
                                      stats.pearsonr(c,d)[0]) / 6 )
    except:
        herding3Coefficients.append(-2)

In [None]:
herding5Coefficients = []
for key in tqdm_notebook(herding_df['ListingKey']):
    temp_df = bids.query(f"ListingKey == '{key}'").sort_values("CreationDate")
    amounts = temp_df['Amount'].values
    a,b,c,d,e,f = [], [], [], [], [], [] # create 6 dimension series to calculate 3-step herding coefficient
    for i in range(len(amounts)-5):
        a.append(amounts[i])
        b.append(amounts[i+1])
        c.append(amounts[i+2])
        d.append(amounts[i+3])
        e.append(amounts[i+4])
        f.append(amounts[i+5])
    try:
        # calculate herding coefficient
        herding5Coefficients.append( (stats.pearsonr(a,b)[0] + 
                                      stats.pearsonr(a,c)[0] + 
                                      stats.pearsonr(a,d)[0] + 
                                      stats.pearsonr(a,e)[0] + 
                                      stats.pearsonr(a,f)[0] + 
                                      stats.pearsonr(b,c)[0] + 
                                      stats.pearsonr(b,d)[0] + 
                                      stats.pearsonr(b,e)[0] +
                                      stats.pearsonr(b,f)[0] +
                                      stats.pearsonr(c,d)[0] +
                                      stats.pearsonr(c,e)[0] +
                                      stats.pearsonr(c,f)[0] + 
                                      stats.pearsonr(d,e)[0] +
                                      stats.pearsonr(d,f)[0] +
                                      stats.pearsonr(e,f)[0]) / 15 )
    except:
        herding5Coefficients.append(-2)

In [None]:
herding7Coefficients = []
for key in tqdm_notebook(herding_df['ListingKey']):
    temp_df = bids.query(f"ListingKey == '{key}'").sort_values("CreationDate")
    amounts = temp_df['Amount'].values
    a,b,c,d,e,f,g,h = [], [], [], [], [], [], [], [] # create 6 dimension series to calculate 3-step herding coefficient
    for i in range(len(amounts)-7):
        a.append(amounts[i])
        b.append(amounts[i+1])
        c.append(amounts[i+2])
        d.append(amounts[i+3])
        e.append(amounts[i+4])
        f.append(amounts[i+5])
        g.append(amounts[i+6])
        h.append(amounts[i+7])
    try:
        # calculate herding coefficient
        herding7Coefficients.append( (stats.pearsonr(a,b)[0] + 
                                      stats.pearsonr(a,c)[0] + 
                                      stats.pearsonr(a,d)[0] + 
                                      stats.pearsonr(a,e)[0] + 
                                      stats.pearsonr(a,f)[0] + 
                                      stats.pearsonr(a,g)[0] + 
                                      stats.pearsonr(a,h)[0] + 
                                      stats.pearsonr(b,c)[0] + 
                                      stats.pearsonr(b,d)[0] + 
                                      stats.pearsonr(b,e)[0] +
                                      stats.pearsonr(b,f)[0] +
                                      stats.pearsonr(b,g)[0] +
                                      stats.pearsonr(b,h)[0] +
                                      stats.pearsonr(c,d)[0] +
                                      stats.pearsonr(c,e)[0] +
                                      stats.pearsonr(c,f)[0] + 
                                      stats.pearsonr(c,g)[0] + 
                                      stats.pearsonr(c,h)[0] + 
                                      stats.pearsonr(d,e)[0] +
                                      stats.pearsonr(d,f)[0] +
                                      stats.pearsonr(d,g)[0] +
                                      stats.pearsonr(d,h)[0] +
                                      stats.pearsonr(e,f)[0] +
                                      stats.pearsonr(e,g)[0] +
                                      stats.pearsonr(e,h)[0] + 
                                      stats.pearsonr(f,g)[0] + 
                                      stats.pearsonr(f,h)[0] + 
                                      stats.pearsonr(g,h)[0] ) / 28 )
    except:
        herding7Coefficients.append(-2)
        