In [23]:
from Dataset import SPAM_import

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [24]:
# df = pd.read_csv('data/spamdata.csv', sep=' ') 
# df = df.sample(frac = 1, random_state = 42).reset_index(drop = True)
# X = df.drop('isSPAM', axis=1).values 
# y = df['isSPAM'].values
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

data = SPAM_import()
X_train = data.train_x
y_train = data.train_y

X_test = data.val_x
y_test = data.val_y


Shape of y :  (4601,)
Shape of X :  (4601, 57)
y to_numeric :  [0 0 0 ... 0 0 1]


In [None]:
class PerceptronLearning:
    def __init__(self, learning_rate = 1.0, max_iter = 1000):
        self.learning_rate = learning_rate
        self.max_iter = max_iter
        self.w = None
        self.b = 0
    
    def _step_function(self, x):
        return np.where(x >= 0, 1, 0)

    def train(self, X, y):
        n_samples, n_features = X.shape
        self.w = np.zeros(n_features)
        self.b = 0

        # regularize y to {0, 1}
        y = np.where(y <= 0, 0, 1)

        for _ in range(self.max_iter):
            errors = 0
            for i in range(n_samples):
                condition = y[i] * (np.dot(self.w, X[i]) + self.b)
                if condition <= 0:
                    # update
                    self.w += self.learning_rate * y[i] * X[i]
                    self.b += self.learning_rate * y[i]
                    errors += 1
            # if no error in this iterate
            if errors == 0:
                break

    def predict(self, X):
        # linear_output = np.dot(X, self.w) + self.b
        # return np.where(linear_output >= 0, 1, 0)
        linear_output = np.dot(X, self.w) + self.b
        return self._step_function(linear_output)

    def evaluation(self, X, y_true):
        y_true = np.where(np.array(y_true) <= 0, 0, 1) # important
        
        y_pred = self.predict(X)
        acc = accuracy_score(y_true, y_pred)
        prec = precision_score(y_true, y_pred, pos_label=1, average = 'macro')
        rec = recall_score(y_true, y_pred, pos_label=1, average = 'macro')
        f1 = f1_score(y_true, y_pred, pos_label=1, average = 'macro')


        return {
            "Accuracy": acc,
            "Precision": prec,
            "Recall": rec,
            "F1-Score": f1
        }

In [26]:
model = PerceptronLearning(learning_rate = 0.001)

model.train(X_train, y_train)
eval_metrics = model.evaluation(X_test, y_test)

print("Evaluation Metrics:")
for metric, score in eval_metrics.items():
    print(f"{metric}: {score:.4f}")

y_pred = model.predict(X_test)
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['Ham (0)', 'Spam (1)']))

Evaluation Metrics:
Accuracy: 0.5683
Precision: 0.7263
Recall: 0.6545
F1-Score: 0.5549

Classification Report:
              precision    recall  f1-score   support

     Ham (0)       0.99      0.31      0.48       289
    Spam (1)       0.46      0.99      0.63       172

    accuracy                           0.57       461
   macro avg       0.73      0.65      0.55       461
weighted avg       0.79      0.57      0.54       461



In [27]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

class PerceptronLearningOptimize:
    def __init__(self, learning_rate=0.01, max_iter=1000):
        self.learning_rate = learning_rate
        self.max_iter = max_iter
        self.w = None
        self.b = None

    def _step_function(self, x):
        return np.where(x >= 0, 1, 0)

    def train(self, X, y):
        n_samples, n_features = X.shape
        self.w = np.zeros(n_features)
        self.b = 0
        
        # Ensure labels are 0 and 1
        y = np.array(y)
        y = np.where(y <= 0, 0, 1)

        for _ in range(self.max_iter):
            errors = 0
            for idx in range(n_samples):
                linear_output = np.dot(self.w, X[idx]) + self.b
                y_pred = self._step_function(linear_output)

                update = self.learning_rate * (y[idx] - y_pred)
                if update != 0:
                    self.w += update * X[idx]
                    self.b += update
                    errors += 1
            if errors == 0:
                break

    def predict(self, X):
        linear_output = np.dot(X, self.w) + self.b
        return self._step_function(linear_output)

    def evaluation(self, X, y_true):
        y_true = np.where(np.array(y_true) <= 0, 0, 1)
        y_pred = self.predict(X)
        acc = accuracy_score(y_true, y_pred)
        prec = precision_score(y_true, y_pred, average='macro', zero_division=0)
        rec = recall_score(y_true, y_pred, average='macro', zero_division=0)
        f1 = f1_score(y_true, y_pred, average='macro', zero_division=0)

        return {
            "Accuracy": acc,
            "Precision": prec,
            "Recall": rec,
            "F1-Score": f1
        }


In [28]:
from sklearn.linear_model import Perceptron

# create fake data
# X, y = make_classification(n_samples=100, n_features=2, n_classes=2, n_informative=2, n_redundant=0, random_state=42)
# y = 2 * y - 1
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# percetron model in skleran
clf = Perceptron(max_iter=1000, tol=1e-3)
clf.fit(X_train, y_train)

print("Accuracy:", clf.score(X_test, y_test))

# percetron model v1
clf = PerceptronLearning()
clf.train(X_train, y_train)

print("Accuracy:", clf.evaluation(X_test, y_test))

# percetron model optimize
clf = PerceptronLearningOptimize()
clf.train(X_train, y_train)

print("Accuracy:", clf.evaluation(X_test, y_test))

Accuracy: 0.9067245119305857
Accuracy: {'Accuracy': 0.5683297180043384, 'Precision': 0.726272534464475, 'Recall': 0.6545324696225959, 'F1-Score': 0.5549264752257169}
Accuracy: {'Accuracy': 0.8763557483731019, 'Precision': 0.8654221476248097, 'Recall': 0.8766697513478716, 'F1-Score': 0.8699911437434753}
