In [1]:
import numpy as np
import pandas as pd
import random
from random import shuffle
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')


# Q1

#### When $\alpha = [0.65,0.35]  $ for Weight and Height respectively

In [2]:
def sim(x,y):
  dist = np.abs(0.65*(x[0]-y[0]) + 0.35*(x[1]-y[1]))         ## Weight is more likely to be used for categorization
  return np.exp(-1*dist)     ##Beta is set to 1 as suggsted in paper


In [3]:
##Calculates N(R,x) for each exeemplar x

def calcN(tl):

  N = np.zeros((len(tl),3))
  for i in range(len(tl)):
    N[i][tl[i]-1]+=1
  
  return N

#### When $\gamma = [0.34,0.34,0.32]$ for Categories Small, Average , Large respectively

In [4]:
def predict(train,y,N):
  votes = [0.0]*3
  gamma = [0.34,0.34,0.32] ##Prior probability of new stimilus being in category c

  for i in range(3):
    for j in range(len(train)):
      votes[i] += N[j][i]*sim(train[j][0:2],y) ##For each exemplar calculate similarity with new stimulus and take product with exemplar's membership count in that category
    
    votes[i]*= gamma[i] ## Multiplying with prior

  votes = votes/np.sum(votes)

  return np.argmax(votes)+1 ##Return category with highest probability

In [5]:
df = pd.read_csv('X.csv',header = None) ##Reads Training Dataset
train = np.array(df)
df = pd.read_csv('y.csv',header = None) ##Reads Test Dataset
df[2] = -1 ## Initializes category of all stimulus to -1
test = np.array(df)


for t in range(len(test)):
  N = calcN(train[:,-1]) ##Calculates N(R,x)
  test[t][2] = predict(train,test[t][0:2],N) ##Store Category
  
  train = np.concatenate((train,test[t].reshape(1,3)),axis = 0) ## Add this test stimulus to training data for succeeding test sstimulus

In [6]:
## Displays and stores predicted category labels
pd.DataFrame(test).to_csv("test_gcm_a1g1.csv",header=False,index=False)
print(pd.DataFrame(test,columns = ['Weight' ,'Height',  'Label' ]))

   Weight  Height  Label
0      74      67      2
1      69      63      2
2      92      81      3
3      64      61      2
4      66      84      3
5      76      68      3
6      61      58      2
7      64      76      2
8      68      66      2
9      34      61      1


#### When $\gamma = [0.34,0.49,0.17]$ for Categories Small, Average , Large respectively

In [7]:
def predict(train,y,N):
  votes = [0.0]*3
  gamma = [0.34,0.49,0.17]
  for i in range(3):
    for j in range(len(train)):
      votes[i] += N[j][i]*sim(train[j][0:2],y)
    
    votes[i]*= gamma[i]

  votes = votes/np.sum(votes)

  return np.argmax(votes)+1

In [8]:
df = pd.read_csv('X.csv',header = None)
train = np.array(df)
df = pd.read_csv('y.csv',header = None)
df[2] = 0
test = np.array(df)


for t in range(len(test)):
  N = calcN(train[:,-1])
  test[t][2] = predict(train,test[t][0:2],N)
 
  train = np.concatenate((train,test[t].reshape(1,3)),axis = 0)

In [9]:

pd.DataFrame(test).to_csv("test_gcm_a1g2.csv",header=False,index=False)
print(pd.DataFrame(test,columns = ['Weight' ,'Height',  'Label' ]))

   Weight  Height  Label
0      74      67      2
1      69      63      2
2      92      81      3
3      64      61      2
4      66      84      2
5      76      68      3
6      61      58      2
7      64      76      2
8      68      66      2
9      34      61      1


#### When $\alpha = [0.55,0.45]  $ for Weight and Height respectively

In [10]:
def sim2(x,y):
  dist = np.abs(0.55*(x[0]-y[0]) + 0.45*(x[1]-y[1]))         ## Weight is more likely to be used for categorization
  return np.exp(-1*dist)     ##Beta is set to 1 as suggsted in paper


#### When $\gamma = [0.34,0.34,0.32]$ for Categories Small, Average , Large respectively

In [11]:
def predict(train,y,N):
  votes = [0.0]*3
  gamma = [0.34,0.34,0.32]
  for i in range(3):
    for j in range(len(train)):
      votes[i] += N[j][i]*sim2(train[j][0:2],y)
    
    votes[i]*= gamma[i]

  votes = votes/np.sum(votes)

  return np.argmax(votes)+1

In [12]:
df = pd.read_csv('X.csv',header = None)
train = np.array(df)
df = pd.read_csv('y.csv',header = None)
df[2] = -1
test = np.array(df)


for t in range(len(test)):
  N = calcN(train[:,-1])
  test[t][2] = predict(train,test[t][0:2],N)
  
  train = np.concatenate((train,test[t].reshape(1,3)),axis = 0)

In [13]:

pd.DataFrame(test).to_csv("test_gcm_a2g1.csv",header=False,index=False)
print(pd.DataFrame(test,columns = ['Weight' ,'Height',  'Label' ]))

   Weight  Height  Label
0      74      67      2
1      69      63      2
2      92      81      3
3      64      61      2
4      66      84      3
5      76      68      3
6      61      58      1
7      64      76      2
8      68      66      2
9      34      61      1


#### When $\gamma = [0.34,0.49,0.17]$ for Categories Small, Average , Large respectively

In [14]:
def predict(train,y,N):
  votes = [0.0]*3
  gamma = [0.34,0.49,0.17]
  for i in range(3):
    for j in range(len(train)):
      votes[i] += N[j][i]*sim2(train[j][0:2],y)
    
    votes[i]*= gamma[i]

  votes = votes/np.sum(votes)

  return np.argmax(votes)+1

In [15]:
df = pd.read_csv('X.csv',header = None)
train = np.array(df)
df = pd.read_csv('y.csv',header = None)
df[2] = 0
test = np.array(df)


for t in range(len(test)):
  N = calcN(train[:,-1])
  test[t][2] = predict(train,test[t][0:2],N)
  
  train = np.concatenate((train,test[t].reshape(1,3)),axis = 0)

In [16]:

pd.DataFrame(test).to_csv("test_gcm_a2g2.csv",header=False,index=False)
print(pd.DataFrame(test,columns = ['Weight' ,'Height',  'Label' ]))

   Weight  Height  Label
0      74      67      2
1      69      63      2
2      92      81      3
3      64      61      2
4      66      84      3
5      76      68      3
6      61      58      2
7      64      76      2
8      68      66      2
9      34      61      1


# Q2

In [17]:
class dLocalMAP:
    """
    See Anderson (1990, 1991)
    'Categories' renamed 'clusters' to avoid confusion.
    Discrete version.
    
    Stimulus format is a list of integers from 0 to n-1 where n is the number
    of possible features (e.g. [1,0,1])
    
    args: c, alphas
    """
    
    def __init__(self, args): ##Parameter Initialization
        self.partition = [[]]
        self.c, self.alpha = args
        ## c is coupling probability and alpha is prior probability of value j occuring on ith dimension
        self.alpha0 = [sum(self.alpha[0]),sum(self.alpha[1]),sum(self.alpha[2])] ## Sum of alpha values for each dimension
        self.N = 0   ##Total no of stimulus experienced
        
    def probClustVal(self, k, i, val):
        """Find P(j|k)"""
        ## No of seen stimulus in cluster k with same value at ith dimension as test stimulus
        cj = len([x for x in self.partition[k] if x[i]==val+1]) ##Modified Code Here
        ## Total no of stimulus in cluster k
        nk = len(self.partition[k]) ## Modified Code Here
        
        return (cj + self.alpha[i][val])/(nk + self.alpha0[i]) ## Probability of showing value val on ith dimension if from cluster k
    
    def condclusterprob(self, stim, k): ##Calculates probability of seeing feature F assuming stimulus is from cluster k
        """Find P(F|k)"""
        pjks = []
        
        for i in range(len(stim)): ## For each dimension
            ## Calculates no of seen stimuli with near value as new stimuli in cluster k
            cj = len([x for x in self.partition[k] if x[i] in range(stim[i]-2,stim[i]+3)]) ##Modified Code here
            
            nk = len(self.partition[k]) ## No of seen stimuli in cluster k
            
            pjks.append((cj + self.alpha[i][stim[i]-1])/(nk + self.alpha0[i]) ) ## Stores Conditional Probability for each dimension
        
        return np.product( pjks ) ## Returns conditional probability
        
    
    def posterior(self, stim): ## Calculates probability of stimulus belonging to cluster k given observed features
        """Find P(k|F) for each cluster"""
        pk = np.zeros( len(self.partition) )
        pFk = np.zeros( len(self.partition) )
        
        # existing clusters:
        for k in range(len(self.partition)):
            pk[k] = self.c * len(self.partition[k])/ ((1-self.c) + self.c * self.N) ## Calculates probability of it being from previous clusters
            if len(self.partition[k])==0: # case of new cluster
                pk[k] = (1-self.c) / (( 1-self.c ) + self.c * self.N) ## Calculates probability that it belongs to new cluster
            
            pFk[k] = self.condclusterprob( stim, k) ## Calculates Conditional Probability
        
        # put it together
        pkF = (pk*pFk) # / sum( pk*pFk ) Multiply Prior and Conditional Probability

        return pkF
    
    def stimulate(self, stim):
        """Argmax of P(k|F) + P(0|F)"""
        winner = np.argmax( self.posterior(stim) ) ##Assigns training stimulus to a cluster based on posterior value

        
        if len(self.partition[winner]) == 0: ##if assigned to new cluster then add a new empty cluster and assign stimulus to winner cluster
            self.partition.append( [] )
        self.partition[winner].append(stim)
        
        self.N += 1 ##Increments counter by 1 for seen stimulus
    
    def query(self, stimulus):
        """Queried value should be -1."""
        qdim = -1
        ## FInds out which dimension value needs to be predicted and enforces that there is only one such dimension
        for i in range(len(stimulus)):
            if stimulus[i] < 0:
                if qdim != -1:
                    raise (Exception, "ERROR: Multiple dimensions queried.")
                qdim = i
        
        self.N = sum([len(x) for x in self.partition]) ##Calculates no of exemplars seen
        
        pkF = self.posterior(stimulus) ## Calculates posterior of test stimulus
        pkF = pkF[:-1] / sum(pkF[:-1]) # eliminate `new cluster' prob
        
        pjF = np.array( [sum( [ pkF[k] * self.probClustVal(k, qdim, j) \
                for k in range(len(self.partition)-1)] ) 
                for j in range(len( self.alpha[qdim] ))] )
        
        return pjF / sum(pjF) ## Calculates and return probability of a value occuring for queried dimension


#### When $c = 0.0001 $ where $c$ is Coupling Probability

#### When $ \alpha = [0.21,0.27 , 0.52] $ for Categories

In [18]:
def testlocalmapD():
    """
    Tests the Anderson's ratinal model using the Medin & Schaffer (1978) data.
    
    This script will print out the probability that each item belongs to each
    of the existing clusters or to a new cluster, and the model assign it to
    the most likely cluster. To see that the model is working correctly, you
    can follow along with Anderson (1991), which steps through in the same way.
    """
 
    for i in range(1):
        model = dLocalMAP([0.0001, np.array([np.ones(100)/100,np.ones(100)/100,[0.21,0.27,0.52]])]) ## Initializes c and alpha
        
        
        df = pd.read_csv('X.csv',header = None) ##Reads training data
        train = np.array(df) ##Converts into numpy array

        df = pd.read_csv('y.csv',header = None) ##Reads test data
        df[2] = -1 ## Initializes category label to -1 
        test = np.array(df) ##Converts into numpy array


      
        for t in train: ## Add each training stimulus to RMC model
            model.stimulate(t)

        for q in test: ## For each test stimulus predict category label

            q[2] = np.argmax(model.query(q))+1
            model.stimulate(q) ## Add current test stimulus as training stimulus for next stimulus
            
    return test
        
        
        

In [19]:
test = testlocalmapD()
df = pd.DataFrame(test,columns = ['Weight' ,'Height',  'Label' ])
print(df.head(10),end='\n\n\n')
df.to_csv("test_rmc_a1c1.csv",header=False,index=False)

   Weight  Height  Label
0      74      67      3
1      69      63      2
2      92      81      3
3      64      61      2
4      66      84      2
5      76      68      3
6      61      58      1
7      64      76      2
8      68      66      2
9      34      61      1




#### When $ \alpha = [0.33,0.33 , 0.34] $ for Categories

In [20]:
def testlocalmapD():
    """
    Tests the Anderson's ratinal model using the Medin & Schaffer (1978) data.
    
    This script will print out the probability that each item belongs to each
    of the existing clusters or to a new cluster, and the model assign it to
    the most likely cluster. To see that the model is working correctly, you
    can follow along with Anderson (1991), which steps through in the same way.
    """
 
    for i in range(1):
        model = dLocalMAP([0.0001, np.array([np.ones(100)/100,np.ones(100)/100,[0.33,0.33,0.34]])])
        
        
        df = pd.read_csv('X.csv',header = None)
        train = np.array(df)

        df = pd.read_csv('y.csv',header = None)
        df[2] = -1
        test = np.array(df)
 

      
        for t in train:
            model.stimulate(t)

        for q in test:
  
            q[2] = np.argmax(model.query(q))+1
            model.stimulate(q)
            
    return test
        
        
        

In [21]:
test = testlocalmapD()
df = pd.DataFrame(test,columns = ['Weight' ,'Height',  'Label' ])
print(df.head(10),end='\n\n\n')
df.to_csv("test_rmc_a1c2.csv",header=False,index=False)

   Weight  Height  Label
0      74      67      3
1      69      63      2
2      92      81      2
3      64      61      2
4      66      84      2
5      76      68      3
6      61      58      1
7      64      76      2
8      68      66      2
9      34      61      1




#### When $ c = 0.1 $ where $c$ is Coupling Probability

#### When $ \alpha = [0.21,0.27 , 0.52] $ for Categories

In [22]:
def testlocalmapD():
    """
    Tests the Anderson's ratinal model using the Medin & Schaffer (1978) data.
    
    This script will print out the probability that each item belongs to each
    of the existing clusters or to a new cluster, and the model assign it to
    the most likely cluster. To see that the model is working correctly, you
    can follow along with Anderson (1991), which steps through in the same way.
    """
 
    for i in range(1):
        model = dLocalMAP([0.1, np.array([np.ones(100)/100,np.ones(100)/100,[0.21,0.27,0.52]])])
        
        
        df = pd.read_csv('X.csv',header = None)
        train = np.array(df)
        
        df = pd.read_csv('y.csv',header = None)
        df[2] = -1
        test = np.array(df)
        

      
        for t in train:
            model.stimulate(t)

        for q in test:

            q[2] = np.argmax(model.query(q))+1
            model.stimulate(q)
            
    return test
        
        
        

In [23]:
test = testlocalmapD()
df = pd.DataFrame(test,columns = ['Weight' ,'Height',  'Label' ])
print(df.head(10),end='\n\n\n')
df.to_csv("test_rmc_a2c1.csv",header=False,index=False)

   Weight  Height  Label
0      74      67      2
1      69      63      2
2      92      81      1
3      64      61      2
4      66      84      2
5      76      68      2
6      61      58      2
7      64      76      2
8      68      66      2
9      34      61      1




#### When $ \alpha = [0.33,0.33 , 0.34] $ for Categories

In [24]:
def testlocalmapD():
    """
    Tests the Anderson's ratinal model using the Medin & Schaffer (1978) data.
    
    This script will print out the probability that each item belongs to each
    of the existing clusters or to a new cluster, and the model assign it to
    the most likely cluster. To see that the model is working correctly, you
    can follow along with Anderson (1991), which steps through in the same way.
    """
 
    for i in range(1):
        model = dLocalMAP([0.1, np.array([np.ones(100)/100,np.ones(100)/100,[0.33,0.33,0.34]])])
        
        
        df = pd.read_csv('X.csv',header = None)
        train = np.array(df)
        
        df = pd.read_csv('y.csv',header = None)
        df[2] = -1
        test = np.array(df)
        

      
        for t in train:
            model.stimulate(t)

        for q in test:
            q[2] = np.argmax(model.query(q))+1
            model.stimulate(q)
            
    return test
        
        
        

In [25]:
test = testlocalmapD()
df = pd.DataFrame(test,columns = ['Weight' ,'Height',  'Label' ])
print(df.head(10),end='\n\n\n')
df.to_csv("test_rmc_a2c2.csv",header=False,index=False)

   Weight  Height  Label
0      74      67      2
1      69      63      2
2      92      81      1
3      64      61      2
4      66      84      2
5      76      68      2
6      61      58      2
7      64      76      2
8      68      66      2
9      34      61      1




# Q3

#### For showing Exchangability of data for GCM we will consider $\alpha = [0.
67,0.33]$ and $\gamma = [0.34,0.39,0.27]$

In [26]:
def sim(x,y):
  dist = np.abs(0.67*(x[0]-y[0]) + 0.33*(x[1]-y[1]))   ## Weight is more likely to be used for categorization
  return np.exp(-1*dist)     ##Beta is set to 1 as suggsted in paper

def calcN(tl):
  N = np.zeros((len(tl),3))
  for i in range(len(tl)):
    N[i][tl[i]-1]+=1
  
  return N

def predict(train,y,N):
  votes = [0.0]*3
  gamma = [0.34,0.39,0.27]
  for i in range(3):
    for j in range(len(train)):
      votes[i] += N[j][i]*sim(train[j][0:2],y)
    
    votes[i]*= gamma[i]

  votes = votes/np.sum(votes)

  return np.argmax(votes)+1

In [27]:
def check(res):
  final_res = []
  for i in range(10):
    temp = list(res[0][i])+[res[1][i][2]]+[res[2][i][2]]+[res[3][i][2]]+[res[4][i][2]]+[res[5][i][2]]+[res[6][i][2]]+[res[7][i][2]]+[res[8][i][2]]+[res[9][i][2]]
    final_res.append(temp)

  print('Classification for all shuffles ',end='\n\n')
  df = pd.DataFrame(final_res,columns = ['Weight' ,'Height',  'Shuffle 1' , 'Shuffle 2',  'Shuffle 3',  'Shuffle 4','Shuffle 5','Shuffle 6','Shuffle 7','Shuffle 8','Shuffle 9','Shuffle 10'])
  print(df.head(10))

In [28]:
res =[]
np.random.seed(0)
for i in range(10):

  df = pd.read_csv('X.csv',header = None)
  train = np.array(df)
  df = pd.read_csv('y.csv',header = None)
  df[2] = -1
  test = np.array(df)

  np.random.shuffle(train)
  np.random.shuffle(test)

  for t in range(len(test)):
    N = calcN(train[:,-1])
    test[t][2] = predict(train,test[t][0:2],N)
    
    train = np.concatenate((train,test[t].reshape(1,3)),axis = 0)
  
  print('Classification for ',i+1,'th shuffle',sep='',end='\n\n')
  df = pd.DataFrame(test,columns = ['Weight' ,'Height',  'Label' ])
  print(df.head(10),end='\n\n\n')
  res.append(sorted(test, key=lambda x: x[0]))

check(res)

Classification for 1th shuffle

   Weight  Height  Label
0      64      76      2
1      61      58      2
2      68      66      2
3      34      61      1
4      92      81      3
5      69      63      2
6      66      84      2
7      76      68      3
8      64      61      2
9      74      67      2


Classification for 2th shuffle

   Weight  Height  Label
0      69      63      2
1      66      84      2
2      76      68      3
3      34      61      1
4      61      58      2
5      64      61      2
6      64      76      2
7      68      66      2
8      74      67      2
9      92      81      3


Classification for 3th shuffle

   Weight  Height  Label
0      66      84      2
1      74      67      2
2      92      81      3
3      68      66      2
4      76      68      3
5      61      58      2
6      69      63      2
7      64      76      2
8      34      61      1
9      64      61      2


Classification for 4th shuffle

   Weight  Height  Label
0      34      6

#### As we can that even after shuffling training and test data ten times, we get same category labels for test data each time. Therefore GCM has the property of data exchangibility, i.e. the order in which data enters the model does not affect the category labels of the model for any given subset of data

#### For showing Exchangability of data for RMC we will consider $\alpha = [0.
21,0.27,0.52]$ for category prior and $c = 0.0001$

In [29]:
def testlocalmapD2():
    """
    Tests the Anderson's ratinal model using the Medin & Schaffer (1978) data.
    
    This script will print out the probability that each item belongs to each
    of the existing clusters or to a new cluster, and the model assign it to
    the most likely cluster. To see that the model is working correctly, you
    can follow along with Anderson (1991), which steps through in the same way.
    """
    np.random.seed(0)
    res =[]
    for i in range(10):
        model = dLocalMAP([0.0001, np.array([np.ones(100)/100,np.ones(100)/100,[0.21,0.27,0.52]])])
        
        
        df = pd.read_csv('X.csv',header = None)
        train = np.array(df)
        np.random.shuffle(train)
        df = pd.read_csv('y.csv',header = None)
        df[2] = -1
        test = np.array(df)
        np.random.shuffle(test)

      
        for t in train:
            model.stimulate(t)

        


        for q in test:

            q[2] = np.argmax(model.query(q))+1
            model.stimulate(q)
            
        print('Classification for ',i+1,'th shuffle',sep='',end='\n\n')
        df = pd.DataFrame(test,columns = ['Weight' ,'Height',  'Label' ])
        print(df.head(10),end='\n\n\n')
        res.append(sorted(test, key=lambda x: x[0]))

    check(res)

In [30]:
testlocalmapD2()

[0.34327392 0.31229831 0.34442777]
Classification for 1th shuffle

   Weight  Height  Label
0      64      76      2
1      61      58      1
2      68      66      2
3      34      61      1
4      92      81      3
5      69      63      2
6      66      84      2
7      76      68      3
8      64      61      2
9      74      67      3


[0.33362286 0.31726823 0.34910891]
Classification for 2th shuffle

   Weight  Height  Label
0      69      63      2
1      66      84      2
2      76      68      3
3      34      61      1
4      61      58      1
5      64      61      2
6      64      76      2
7      68      66      2
8      74      67      3
9      92      81      3


[0.32069293 0.32300951 0.35629755]
Classification for 3th shuffle

   Weight  Height  Label
0      66      84      2
1      74      67      3
2      92      81      3
3      68      66      2
4      76      68      3
5      61      58      1
6      69      63      2
7      64      76      2
8      34      61   

#### As we can that even after shuffling training and test data ten times, we get same category labels for test data each time. Therefore RMC has the property of data exchangibility, i.e. the order in which data enters the model does not affect the category labels of the model for any given subset of data