In [1]:
import numpy as np
from data import dataset
import pandas as pd

In [2]:
def RBF_kernek(X1,X2, gamma):
    return np.exp(-gamma * np.sum((X1 - X2)**2, axis = -1))
    # # Free parameter gamma
    # if gamma == None:
    #     gamma = 1.0/X.shape[1]
        
    # # RBF kernel Equation
    # K = np.exp(-gamma * np.sum((X - X[:,np.newaxis])**2, axis = -1))
    
    #return K
def RBF( X, bank):
    # Transform input data using RBF kernel
    transformed_data = np.zeros(len(bank))
    for j in range(len(bank)):
        transformed_data[j] = RBF_kernek(X, bank[j],0.5)
    return transformed_data
def pca(X, num_components):
    # Center the data
    mean = np.mean(X, axis=0)
    X_centered = X - mean

    # Compute the covariance matrix
    covariance_matrix = np.cov(X_centered, rowvar=False)

    # Compute eigenvalues and eigenvectors
    eigenvalues, eigenvectors = np.linalg.eigh(covariance_matrix)

    # Sort eigenvalues and eigenvectors in descending order
    sorted_indices = np.argsort(eigenvalues)[::-1]
    eigenvalues = eigenvalues[sorted_indices]
    eigenvectors = eigenvectors[:, sorted_indices]

    # Select the top 'num_components' eigenvectors
    top_eigenvectors = eigenvectors[:, :num_components]

    # Project the data onto the new subspace
    principal_components = np.dot(X_centered, top_eigenvectors)

    return principal_components

In [4]:
class Perceptron:
    def __init__(self, input_size=42, learning_rate=0.1):
        self.weights = np.random.random(input_size)
        self.bias = 0
        self.learning_rate = learning_rate
        self.bank=0
    def predict(self, inputs):
        inputs=RBF(inputs,self.bank)
        activation = np.dot(self.weights, inputs) + self.bias
        return 1 if activation >=0 else 0

    def train(self,data,label):
        total_error = 0.0
        self.bank=data
        for index, (row,labels) in enumerate(zip(data,label)):
            prediction = self.predict(row)
            row=RBF(row,self.bank)
            error = labels - prediction

            self.weights += self.learning_rate * error *2* row
            self.bias += self.learning_rate * error*2
            total_error += error ** 2
    def test(self,inputs,label):
        acc=0.0
        for index, (row,labels) in enumerate(zip(inputs,label)):
            prediction = self.predict(row)
            if prediction==labels:
                acc+=1
        print(f"Accuracy: {acc/(len(label))}")

In [18]:
from collections import Counter
def pre_rbf(X,gamma=0.5):
    if gamma == None:
        gamma = 1.0/X.shape[1]
        
    # RBF kernel Equation
    K = np.exp(-gamma * np.sum((X - X[:,np.newaxis])**2, axis = -1))
    
    return K
class KNNeu:
    def __init__(self,data,label,k=21):
        self.k = k
        self.train_data=data[:100]
        self.train_label=label[:100]
        self.bank=pre_rbf(self.train_data)
    def euclidean_distance(self, x1, x2):
        return np.sqrt(np.sum((x1 - x2) ** 2))
    def absolute_distance(self, x1, x2):
        return np.sum(np.abs(x1 - x2) )
    def chebyshev_distance(self, x1, x2):
        return np.max(np.abs(x1 - x2) )    
    def test(self,val_data,val_label):
        pred = [self._predict(x) for x in val_data]
        
        acc=0
        for i in range(len(pred)):
            if pred[i]==val_label[i]:
                acc+=1
        print(f"Accuracy: {acc/len(pred)}")

    def predict(self,X):
        pred = [self._predict(x) for x in X]
        return pred
    def _predict(self, x):
        distances = [self.euclidean_distance(RBF(x,self.train_data), train) for train in self.bank]
        # distances = [self.absolute_distance(x, train) for train in self.data.train_data]
        # distances = [self.chebyshev_distance(x, train) for train in self.data.train_data]

        # 前k小
        k_indices = np.argsort(distances)[:self.k]

        # Extract the labels of the k nearest neighbor training samples
        k_nearest_labels = [self.train_label[i] for i in k_indices]
        
        #算誰多
        common = Counter(k_nearest_labels)
        if common[1] >=common[0]:
            return 1
        elif common[1] < common[0]:
            return 0

In [6]:
##這裡要改路徑
data=dataset('C:\\Users\\Robert\\Desktop\\hw2\\train.csv')

In [21]:
linear_model=Perceptron(input_size=1000)
epochs=5
for epoch in range(epochs):
    linear_model.train(data.train_data,data.train_label)
    print(epoch)

0
1
2
3
4


In [18]:
linear_model.test(data.train_data,data.train_label)
linear_model.test(data.val_data,data.val_label)
linear_model.test(data.test_data,data.test_label)

Accuracy: 0.948
Accuracy: 0.9380440348182284
Accuracy: 0.9368600682593856


In [19]:
knn=KNNeu(data.train_data,data.train_label)
knn.test(data.val_data,data.val_label)
knn.test(data.test_data,data.test_label)

Accuracy: 0.9380440348182284
Accuracy: 0.9368600682593856


In [27]:
def final_combine(model1,model2,data,label):
    acc=0
    for i in range(len(label)):
        pred1=model1.predict(data[i])
        pred2=model2.predict([data[i]]) #knn
        pre=int((pred1+pred2[0])/2)
        if pre==label[i]:
            acc+=1
    print(f"Accuracy: {acc/len(label)}")

In [29]:
final_combine(linear_model,knn,data.val_data,data.val_label)
final_combine(linear_model,knn,data.test_data,data.test_label)

Accuracy: 0.9380440348182284
Accuracy: 0.9368600682593856
