### Setup:

In [44]:
import pandas as pd
import numpy as np
from IPython.display import HTML

### User defined functions: 

In [30]:
def carry_out_iterations( data,list_of_cols,t_w,id_colname): 
    
    max_t_w_value =  0.975
    train_data =  data[list_of_cols].copy()

    ## creating empty data frame with same structure as traindata to copy confidence scores 
    train_data_confidence =  train_data.copy()
    train_data_confidence.loc[:,:]= 0

    ## calculating (1-t(w)). Carrying out calculation required for the equation
    t_w_inv =  1- t_w
    tau_w =  -np.log(t_w_inv)

    ## creating dataframe that maintains list of confidence values through each iteration
    confidence_iterations = pd.DataFrame(columns =train_data.columns.tolist() + ['iteration'])
    t_w_df = pd.DataFrame(columns = train_data.columns)

    for iteration in range(0,70):

        for col_name in list_of_cols:
            column_matching_df=  train_data_confidence.copy()
            column_matching_df.loc[:,:]= 0
            current_source =  train_data[col_name]

            other_sources_cols = [x for x in list_of_cols if x != current_source.name]

            column_matching_df[col_name] = 1
            for col_name_others in other_sources_cols:
                column_matching_df[col_name_others] = np.where(train_data[col_name_others]==current_source,1,-1)
            column_matching_df[pd.isnull(train_data)]=0

            for col_ii in range(0,column_matching_df.shape[1]):
                column_matching_df.iloc[:,col_ii] = column_matching_df.iloc[:,col_ii] * tau_w[col_ii]

            train_data_confidence[col_name]= np.where(pd.isnull(current_source),np.nan,1/(1 + np.exp(-column_matching_df.sum(axis=1))))


        ## maintaining record of the trusworthiness scores of websites
        t_w_prev =  t_w.copy()
        t_w_df.loc[iteration]= t_w
        t_w = train_data_confidence.mean()
        t_w [t_w >= max_t_w_value] = max_t_w_value
        t_w_inv =  1- t_w
        tau_w =  -np.log(t_w_inv)

        ## printing itertion number and the trustworthiness score
        print(iteration, np.array(t_w_prev))
        if iteration > 5:
            if np.sum(np.abs(t_w.values - t_w_prev.values)) < 0.001:
                break
    
    train_data_confidence[id_colname] =  data[id_colname]
    
    return(t_w_df,train_data_confidence )

In [32]:
def get_final_confidence(data, column_to_check_confidence ,train_data_confidence ,id_colname):
    
    data['final_confidence'] = 0
    all_source_columns = [x for x in train_data_confidence.columns if x != id_colname]
    
    for col_name_source in all_source_columns:
        matching_rows = data[column_to_check_confidence]==data[col_name_source]
        data.loc[matching_rows,'final_confidence'] = train_data_confidence.loc[matching_rows,col_name_source]
        
    return(data)

### Reading data

In [45]:
data = pd.read_csv('sample_data.csv')


### Running the algorithm

In [46]:
list_of_cols =['Source_A','Source_B','Source_C','Source_D']
no_cols =  len(list_of_cols)
t_w = np.repeat(0.5,no_cols)
id_colname = 'id'

In [35]:
t_w_df,train_data_confidence = carry_out_iterations( data,list_of_cols,t_w,id_colname)

0 [0.5 0.5 0.5 0.5]
1 [0.75615686 0.72480818 0.65849673 0.73488562]
2 [0.83984494 0.76699952 0.65249055 0.80176572]
3 [0.87227385 0.76407503 0.63453102 0.82516597]
4 [0.88826799 0.75756231 0.62589127 0.83535758]
5 [0.89657536 0.75348551 0.6222036  0.8393747 ]
6 [0.90097067 0.75138065 0.62060777 0.840692  ]
7 [0.90334014 0.75033225 0.61989127 0.84100719]
8 [0.90463863 0.74980158 0.61955455 0.84101422]
9 [0.90535871 0.74952501 0.61938868 0.84095462]


In [36]:
column_to_check_confidence = 'Krushak_Odisha'

data_copy = get_final_confidence(data, column_to_check_confidence ,train_data_confidence ,id_colname)

In [37]:
data_copy

Unnamed: 0,id,Krushak_Odisha,Source_A,Source_B,Source_C,Source_D,final_confidence
0,1,Dhoni,MS,MS,MS,MS,0.0
1,2,Sehwag,Sehwag,Viru,Sehwag,Sehwag,0.977639
2,3,Gambhir,Gautam,Gautam,Gautam,Gautam,0.0
3,4,Sachin,Sachin,Sachin,Sachin,Sachin,0.998567
4,5,Yuvaraj,Yuvaraj,Yuvaraj,Yuvaraj,Yuvaraj,0.998567
5,6,Raina,Raina,Suresh,Raina,Raina,0.977639
6,7,Kohli,Virat,Kohli,Kohli,Virat,0.13636
7,8,Nehra,Ashish,Ashish,Ashish,Ashish,0.0
8,9,Yusuf,Yusuf,Yusuf,Yusuf,Yusuf,0.998567
9,10,Munaf,Munaf,Patel,Patel,Patel,0.138086
