In [1]:
# Last modified: 2018/5/19 3:50
import plotly
import pandas as pd
from scipy.io import arff
import numpy as np
from sklearn import datasets
from numpy import linalg as la
from sklearn.linear_model import Perceptron
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import explained_variance_score
from MyPerceptron import *
from MLPerceptron import *
from matplotlib import *


def noise_generator(X, bool_func, noise_rate=0.1):
    noise_num = int(noise_rate * X.shape[0])
    if bool_func == 'and':
        noises = [[0, 0, 0, 0, 0, 0, 0, 0, 1], [1, 1, 1, 1, 1, 1, 1, 1, 0], [1, 0, 0, 1, 0, 0, 1, 0, 1]]
    elif bool_func == 'or':
        noises = [[0, 0, 0, 0, 0, 0, 0, 0, 1], [1, 1, 1, 1, 1, 1, 1, 1, 0], [1, 0, 0, 1, 0, 0, 1, 0, 0]]
    for i in range(noise_num):
        idx = random.randint(0, len(noises) - 1)
        noise = np.array([noises[idx]])
        print(noise)
        X = np.concatenate((X, noise), axis=0)
    # print('X '+bool_func, X)
    return X


def label_race(row, col_name, value):
    if float(row[col_name]) == value:
        return 1
    else:
        return 0


def bool_func_calculator(Y, y):
    length = Y.shape[0]
    error = 0
    for i in range(length):
        if Y[i] != y[i]:
            error+=1.0
    return error/length


def MSE(target, predictions):
    squared_deviation = np.power(target - predictions, 2)
    return np.mean(squared_deviation)


filename = "autoMpg.arff"
data = arff.loadarff(filename)
column_names = ['cylinders', 'displacement', 'horsepower', 'weight', 'acceleration', 'model', 'origin', 'class']
df = pd.DataFrame(data[0], columns=column_names)
for i in df.columns:
    df[i] = df[i].fillna(df[i].mean())

Cylinders = [8, 4, 6, 3, 5]

for value in Cylinders:
    df['cylinder_' + str(value)] = df.apply(lambda row: label_race(row, 'cylinders', value), axis=1)

Model = [70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82]

for value in Model:
    df['model_' + str(value)] = df.apply(lambda row: label_race(row, 'model', value), axis=1)

Origin = [1, 2, 3]

for value in Origin:
    df['origin_' + str(value)] = df.apply(lambda row: label_race(row, 'origin', value), axis=1)

df = df.drop(columns=['cylinders', 'model', 'origin'], axis=1)
# print(df)
y = np.asarray(df['class'], dtype=np.float64)

df = df.drop(columns=['class'])
x = df.loc[:, 'displacement':'origin_3']
test_size = 0.2
random_state = 3
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=test_size, random_state=random_state)

sc = StandardScaler(with_mean=True)
sc.fit(x_train)
X_train_std = sc.transform(x_train)
X_train_std = np.array(X_train_std, dtype=np.float64)

X_test_std = sc.transform(x_test)
# perceptron
n_iter = 20000
eta = 0.01
random_state = 0
# Predict MPG using Multilayer Perceptron
pp = MLPerceptron((25, 1), n_iter=n_iter, rate=eta, activator='linear')
pp.train(X_train_std, y_train)
y_predicted = pp.predict(X_test_std)
y_test = np.asarray(y_test, dtype=np.float)
print(y_predicted)
score = explained_variance_score(y_true=y_test, y_pred=y_predicted)
print('explained_variance_score', score)
for i in range(y_test.shape[0]):
    print(y_test[i], y_predicted[i])
score = MSE(y_test, y_predicted)
print('MSE score:', score)

noise_rate = 0.1
# Predict 8-input linear-separable boolean function using pocket algorithm
ands = generate_bool_dataset(0, 255, 'and')
ors = generate_bool_dataset(0, 255, 'or')
and_dataset = np.array(ands)
# Generate random noises
and_dataset_noised = noise_generator(and_dataset, 'and', noise_rate=noise_rate)
or_dataset = np.array(ors)
# Generate random noises
or_dataset_noised = noise_generator(or_dataset, 'or', noise_rate=noise_rate)
X_and = and_dataset[:, :-1]
y_and = and_dataset[:, -1]
X_and_noised = and_dataset_noised[:, :-1]
y_and_noised = and_dataset_noised[:, -1]
X_or = or_dataset[:, :-1]
y_or = or_dataset[:, -1]
X_or_noised = or_dataset_noised[:, :-1]
y_or_noised = or_dataset_noised[:, -1]
pp = MLPerceptron(n_iter=n_iter, rate=eta, activator='boolean', use_mlp=False)
pp.train(X_and, y_and)
pred = pp.predict(X_and)
error_rate = bool_func_calculator(y_and, pred)
print("Error rate: (AND function, no noise, use pocket algorithm)", error_rate)
pp.train(X_and_noised, y_and_noised)
pred_noised = pp.predict(X_and)
error_rate = bool_func_calculator(y_and, pred)
print("Error rate: (AND function, with noise, use pocket algorithm)", error_rate)
pp.train(X_or, y_or)
pred = pp.predict(X_or)
pp.train(X_or_noised, y_or_noised)
pred_noised = pp.predict(X_or)
print(pred_noised)

# Predict 8-input linear-separable boolean function without using pocket algorithm
pp = MLPerceptron(n_iter=n_iter, rate=eta, activator='boolean', use_mlp=False)
pp.train(X_and_noised, y_and_noised, use_pocket=False)
pred = pp.predict(X_and)
print(pred)
pp.train(X_or_noised, y_or_noised, use_pocket=False)
pred = pp.predict(X_or)
print(pred)

# Predict 8-input non-linear-separable boolean function using Multilayer Perceptron
xors = generate_bool_dataset(0, 255, 'xor')
xor_train = list(xors)
for i in range(50):
    xor_train.append([1, 1, 1, 1, 1, 1, 1, 1, 0])
    xor_train.append([0, 0, 0, 0, 0, 0, 0, 0, 0])
xor_dataset = np.array(xors)
xor_trainset = np.array(xor_train)
X_xor = xor_dataset[:, :-1]
X_xor_trian = xor_trainset[:, :-1]
y_xor = xor_dataset[:, -1]
y_xor_train = xor_trainset[:, -1]
n = MLPerceptron((8, 20, 1), activator='sigmoid', n_iter=20000, rate=0.01)
n.train(X_xor_trian, y_xor_train)
xor_test = np.array(X_xor)
print(Output_predict(xor_test, n))


ImportError: C extension: No module named 'pandas._libs.tslib' not built. If you want to import pandas from the source directory, you may need to run 'python setup.py build_ext --inplace --force' to build the C extensions first.