In [59]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd


df = pd.read_csv('ionodata/iono.data', header=None)

# col[0:34] as X, col[34] as y
X = df.iloc[:, 0:34].values

y = df.iloc[:, 34].values
y = np.where(y == 'g', 10, 0)
print(X.shape, y.shape)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

np.random.seed(10)

class Model:
    def __init__(self, input_dim):
        self.weights = np.zeros((input_dim))

    def predict(self, input_data):
        # print(input_data.shape, self.weights.shape)
        return np.dot(input_data, self.weights)

    def update_weights(self, gradient):
        self.weights += gradient
        # Generate three random numbers for each weight
        random_nums = np.random.uniform(low=-1.0, high=1.0, size=(10, self.weights.shape[0]))
        # Count how many random numbers are less than the weights
        votes = np.sum(random_nums < self.weights, axis=0)
        # Update weights based on the votes
        self.weights = np.where(votes >= 5, 1.0, -1.0)

        # self.weights = np.where(self.weights > 0.5, 1, self.weights)
        # self.weights = np.where(self.weights < -0.5, -1, self.weights)
        # self.weights = np.where((self.weights >= -0.5) & (self.weights <= 0.5), 0, self.weights)

# class Environment:
#     def __init__(self, state_dim):
#         self.state_dim = state_dim

#     def reset(self):
#         self.state = np.random.rand(self.state_dim)
#         return self.state

#     def step(self, action):
#         reward = np.dot(self.state, action)
#         self.state = np.random.rand(self.state_dim)
#         return self.state, reward

def train(model, X_train, y_train, num_epochs, learning_rate):
    for epoch in range(num_epochs):
        for input, label in zip(X_train, y_train):
            predicted_action = model.predict(input)
            predicted_action = np.where(predicted_action > 0, 1, 0)
            reward = np.dot(input, label)
            # print('predicted_action: ', predicted_action, ', label: ', label, ', reward: ', reward)
            gradient = (reward - model.predict(input)) * input
            model.update_weights(learning_rate * gradient)

# Example usage
model = Model(34)
train(model, X_train, y_train, num_epochs=100, learning_rate=0.01)

# print the weights
print(model.weights)

# test
correct = 0
for test_input, label in zip(X_test, y_test):
    predicted_action = model.predict(test_input)
    predicted_action = np.where(predicted_action > 0, 10, 0)
    if predicted_action == label:
        correct += 1
print('Accuracy: ', correct / len(y_test))


(351, 34) (351,)
[ 1.  1.  1. -1.  1.  1.  1. -1. -1.  1. -1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1. -1.  1.  1.  1.  1.  1.  1. -1.  1. -1.  1. -1.  1.]
Accuracy:  0.5833333333333334
