In [1]:
import sys
from math import sqrt,log,exp
from random import choice, randrange, seed
from copy import deepcopy
import numpy as np
import json
import csv
import time

In [4]:
#Importing data

def import_data_adaboost(filename):
    file = open(filename)
    data = []
    for line in file:
        eachline = line.rstrip().split(' ')[1:]
        data.append(list(map(int,eachline)))
    file.close()
    return data

#New adaboost functions

def adhypothesis(sdata,dstump):
        hyp1={}
        hyp2={}
        for img in sdata:
            if img[dstump[0]] > img[dstump[1]]:
                hyp1[str(img[0])]=hyp1.get(str(img[0]),0)+1
            else:
                hyp2[str(img[0])]=hyp2.get(str(img[0]),0)+1
        
        #print(hyp1)
        #print(hyp2)
        
        if len(hyp1)==0:
            return list(hyp2.keys())
        elif len(hyp2)==0:
            return list(hyp1.keys())
        else:
            return [max(hyp1,key=hyp1.get),max(hyp2,key=hyp2.get)]
        
def adaboost_train(sdata,x):
        dstumps=[]
        n = len(sdata)
        wts = [1/n]*n
        m = np.linspace(1,192,192)
        for i in range(0,x):
            dstumps.append([int(choice(m)),int(choice(m))])

        hyplist = []
        alphalist = []
        
        for f in range(0,x):
            labs = []
            div = dstumps[f]
            hyp = adhypothesis(sdata,div)
            hyplist.append(hyp)
            for i in range(n):
                if sdata[i][div[0]] > sdata[i][div[1]]:
                    labs.append(hyp[0])
                else:
                    labs.append(hyp[1])
            
            #Calculating error and updating weights
            err=0
            for j in range(n):
                if int(labs[j])!= int(sdata[j][0]):
                    err = err + wts[j]
            for j in range(n):
                if int(labs[j]) == int(sdata[j][0]):
                    wts[j] = wts[j]*(err/(1-err))
            
            #Normalizing weights and calculating alpha
            wts = [(w/sum(wts)) for w in wts]
            alphalist.append(log((1-err)/(err)))
        return hyplist,alphalist,dstumps
    
def adaboost_model_generation(data, class_divisions, x):
        sdata={}
        for img in data:
            for i in class_divisions:
                if img[0] in i:
                    sdata[str(i)]=sdata.get(str(i),tuple())+(img,)
        
        #print (sdata)
        hypotheses = {}
        alphas = {}
        decisionstumps = {}
        
        for d in class_divisions:
            hyp, alpha, dstump= adaboost_train(sdata[str(d)],x)
            hypotheses[str(d)] = hyp
            alphas[str(d)] = alpha
            decisionstumps[str(d)] = dstump
        return hypotheses,alphas,decisionstumps
    
def adaboost_test(data, class_divisions, filename, x):
        with open(filename, 'r') as model_file:
            hyp , alpha, dstumps = json.load(model_file)

        labels=[]
        for img in data:
            output = []
            for d in class_divisions:
                labs={}
                for i in range(0,x):
                    if img[dstumps[str(d)][i][0]] >= img[dstumps[str(d)][i][1]]:
                        labs[hyp[str(d)][i][0]] = labs.get(hyp[str(d)][i][0],0) + alpha[str(d)][i]
                    else:
                        labs[hyp[str(d)][i][1]] = labs.get(hyp[str(d)][i][1],0) + alpha[str(d)][i]

                output.append(max(labs,key=labs.get))

            max_count = 0
            for o in output:
                count = output.count(o)
                if count > max_count:
                    max_count = count
                    label = o
            labels.append(label)
        return labels
    
#Calculating Accuracy
def accuracy_adaboost(predictions,actual):
    score=0
    for i,j in zip(predictions,actual):
        if int(i) == int(j[0]):
            score = score + 1
    accuracy=(score/len(actual))*100
    return accuracy

In [5]:
#Main Function

train = import_data_adaboost('train-data.txt')
model_file = 'adaboost_model.txt'
class_divisions = [(0,90),(0,180),(0,270),(90,180),(90,270),(180,270)]
x = 100
print ("Start training:")
start = time.time()
hypotheses,alphas,decisionstumps = adaboost_model_generation(train, class_divisions, x)
print ("Time taken:", time.time() - start)
with open(model_file,'w') as f:
    json.dump([hypotheses,alphas,decisionstumps],f)
f.close()

#Run only test portion with already saved properties file "adaboost_model.txt"
print ("Start testing:")
test = import_data_adaboost('test-data.txt')
start = time.time()
results = adaboost_test(test, class_divisions, model_file, x)
print ("Time taken:", time.time() - start)
accuracy_score=accuracy_adaboost(results,test)
print("Adaboost Accuracy:",accuracy_score)

Start training:
Time taken: 1149.3539834022522
Start testing:
Time taken: 2.058497190475464
Adaboost Accuracy: 67.12619300106044
