In [8]:
import numpy
import matplotlib.pyplot
%matplotlib inline

In [2]:
class Pocket_pla:
    def __init__(self,dimensions,data_set_n,iterations):
            self.d=dimensions
            self.N=data_set_n
            self.T=iterations
            
            self.randomize_target_line()
            self.randomize_training_data()
            self.spoil_training_data_answers()
            
            self.initialize_weights_vector()
            self.train()
            
#             self.plot_everything()
    
    def train(self):
        current_iteration = 0
        start_random_idx = 0
        while(current_iteration<self.T):
            idx = numpy.random.randint(start_random_idx,len(self.training_data))
            answer = numpy.sign(numpy.dot(self.weights.T,self.training_data[idx]))
            answer = -1 if answer==0 or answer==-1 else 1
            if(answer!=self.training_data_answers[idx]):
                self.weights+=self.training_data_answers[idx]*self.training_data[idx]
                current_iteration+=1
                self.check_weights_performance()
            else:
                self.training_data[idx],self.training_data[start_random_idx]=self.training_data[start_random_idx], self.training_data[idx]
                self.training_data_answers[idx],self.training_data_answers[start_random_idx]=self.training_data_answers[start_random_idx],self.training_data_answers[idx]
                start_random_idx=(start_random_idx+1)%len(self.training_data)
    
    def check_weights_performance(self):
        correct = 0.
        for i in range(0,len(self.training_data)):
            answer = numpy.sign(numpy.dot(self.weights.T,self.training_data[i]))
            answer = -1 if answer==0 or answer==-1 else 1
            if answer == self.training_data_answers[i]:
                correct+=1
        current_performance = (self.N-correct)/self.N
        if current_performance < self.best_performance:
            self.history_best_performance.append(current_performance)
            self.history_performance.append(current_performance)
            self.best_performance=current_performance
            self.best_weights=self.weights
        else:
            self.history_performance.append(current_performance)
            self.history_best_performance.append(self.best_performance)
        self.check_e_out()
    
    def check_e_out(self):
        tests=1000
        wrong=0
        for i in range(tests):
            test_data=numpy.array([1]+numpy.random.uniform(-1,1,2).tolist(),ndmin=2).T
            correct_answer = self.training_data_point_position(test_data[1],test_data[2])
            answer = self.query(test_data)
            if answer!=correct_answer:
                wrong+=1
        performance = wrong/tests
        if performance < self.best_e_out:
            self.history_e_out.append(performance)
            self.history_best_e_out.append(performance)
            self.best_e_out=performance
        else:
            self.history_e_out.append(performance)
            self.history_best_e_out.append(self.best_e_out)
            
    def query(self,data):
        result = numpy.sign(numpy.dot(self.weights.T,data))
        return -1 if result == 0 or result == -1 else 1
    
    def query_best_weights(self,data):
        result = numpy.sign(numpy.dot(self.best_weights.T,data))
        return -1 if result == 0 or result == -1 else 1
    
    def initialize_weights_vector(self):
        self.weights=numpy.zeros((self.d+1,1))
        self.best_weights=[]
        self.best_performance=1.
        
        self.history_best_performance=[]
        self.history_performance=[]
        
        self.best_e_out=1.
        self.history_e_out=[]
        self.history_best_e_out=[]
        
        self.check_weights_performance()
             
    def randomize_target_line(self):
        points_positions=[]
        for i in range(self.d):
            new_values = numpy.random.uniform(-1,1,2)
            points_positions.append(new_values)
        polynomial = numpy.polyfit(points_positions[0],points_positions[1],1)
        self.target_line_polynomial = numpy.poly1d(polynomial)
        
    def randomize_training_data(self):
        self.training_data=[]
        self.training_data_answers=[]
        for i in range(self.N):
            self.training_data.append(numpy.array([1]+numpy.random.uniform(-1,1,self.d).tolist(),ndmin=2).T)
            self.training_data_answers.append(self.training_data_point_position(self.training_data[-1][1],self.training_data[-1][2]))
    
    def training_data_point_position(self,x,y):
        real_y = self.target_line_polynomial(x)
        return 1 if real_y < y else -1
    
    def spoil_training_data_answers(self):
        for i in range(int(self.N/10)):
            index=numpy.random.randint(0,len(self.training_data_answers))
            self.training_data_answers[index]=-self.training_data_answers[index]
    
    def plot_everything(self):
        self.plot_target_line()
        self.plot_training_data_points()
        self.plot_set_settings()
        matplotlib.pyplot.show()
        
    def plot_target_line(self):
        x_points = numpy.linspace(-1,1,100)
        y_points = self.target_line_polynomial(x_points)
        matplotlib.pyplot.plot(x_points,y_points,label='Target')
        
    def plot_training_data_points(self):
        for i in range (len(self.training_data)):
            if(self.training_data_answers[i]==1):
                matplotlib.pyplot.plot(self.training_data[i][1][0],self.training_data[i][2][0],'g.')
            else:
                matplotlib.pyplot.plot(self.training_data[i][1][0],self.training_data[i][2][0],'r.')
    
    def plot_set_settings(self):
        matplotlib.pyplot.axis([-1,1,-1,1])
        matplotlib.pyplot.ylabel("x2")
        matplotlib.pyplot.xlabel("x1")
        matplotlib.pyplot.legend()


In [3]:
N=100
d=2
T=1000
cycles=20
hist_performance=[0]*(T+1)
hist_best_performance=[0]*(T+1)
hist_e_out=[0]*(T+1)
hist_best_e_out=[0]*(T+1)
for i in range(cycles):
    print(i)
    a = Pocket_pla(d,N,T)
    hist_performance=[hist_performance[x]+a.history_performance[x] for x in range(0,len(hist_performance))]
    hist_best_performance=[hist_best_performance[x]+a.history_best_performance[x] for x in range(0,len(hist_best_performance))]
    hist_e_out=[hist_e_out[x]+a.history_e_out[x]for x in range(0,len(hist_e_out))]
    hist_best_e_out=[hist_best_e_out[x]+a.history_best_e_out[x]for x in range(0,len(hist_best_e_out))]
hist_performance=[x/cycles for x in hist_performance]
hist_best_performance=[x/cycles for x in hist_best_performance]
hist_e_out=[x/cycles for x in hist_e_out]
hist_best_e_out=[x/cycles for x in hist_best_e_out]

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19


In [11]:
%matplotlib
matplotlib.pyplot.axis([0,T+1,0,1])
matplotlib.pyplot.plot(numpy.linspace(0,T+1,T+1),hist_performance,'r')
matplotlib.pyplot.plot(numpy.linspace(0,T+1,T+1),hist_best_performance,'g')
matplotlib.pyplot.show()
# %matplotlib inline

Using matplotlib backend: Qt5Agg


In [12]:
%matplotlib
matplotlib.pyplot.axis([0,T+1,0,1])
matplotlib.pyplot.plot(numpy.linspace(0,T+1,T+1),hist_e_out,'r')
matplotlib.pyplot.plot(numpy.linspace(0,T+1,T+1),hist_best_e_out,'g')
matplotlib.pyplot.show()
import matplotlib.pyplot

Using matplotlib backend: Qt5Agg


In [None]:
In this program Ein and Eout only added to array if wrong clasification of training data occured. It is also possible to 
add these values to arrays during every iteration. In this case our graph will not be that much jagged it will have flat lines.
Flat line happens when no weight update occurs