In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Perceptron
import random
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

In [2]:
from sklearn.datasets import load_iris

iris = load_iris()

# Create a DataFrame with features, targets, and target names
iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
iris_df['target_names'] = iris.target_names[iris.target]

# Rename the 'target_names' column to 'variety'
iris_df.rename(columns={'target_names': 'variety'}, inplace=True)

# Display the updated DataFrame
iris_df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),variety
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [3]:
# Filter the dataset for 'setosa' and 'virginica' species
filtered_df = iris_df[iris_df['variety'].isin(['setosa', 'virginica'])]

# Select the features and target
X = filtered_df[['sepal length (cm)', 'petal width (cm)']]
y = pd.factorize(filtered_df['variety'])[0] # Since the perceptron only intakes binary values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

In [4]:
print('#Training data points: {}'.format(X_train.shape[0]))
print('#Testing data points: {}'.format(X_test.shape[0]))
print('Class labels: {} (mapped from {}'.format(np.unique(y), np.unique(filtered_df['variety'])))

#Training data points: 70
#Testing data points: 30
Class labels: [0 1] (mapped from ['setosa' 'virginica']


In [5]:
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

### Implementing Adaline

Adaline uses an optimization technique, such as gradient descent, to minimize the cost function. Gradient descent iteratively adjusts the weights to find the minimum of the cost function. Adaline's weights are updated in a way that minimizes the mean squared error (MSE) or a similar cost function.

Adaline uses a linear activation function that produces continuous-valued output. Instead of making binary decisions, Adaline computes a real-valued output for each sample. 

The adaline batch gradient descent algorithm that is used below was obtained from: https://nthu-datalab.github.io/ml/labs/04-1_Perceptron_Adaline/04-1_Perceptron_Adaline.html

In [6]:
class AdalineGD(object):
    """ADAptive LInear NEuron classifier.

    Parameters
    ------------
    eta : float
        Learning rate (between 0.0 and 1.0)
    n_iter : int
        Passes over the training dataset.
    random_state : int
        The seed of the pseudo random number generator.

    Attributes
    -----------
    w_ : 1d-array
        Weights after fitting.
    errors_ : list
        Number of misclassifications in every epoch.

    """
    def __init__(self, eta=0.01, n_iter=50, random_state=1):
        self.eta = eta
        self.n_iter = n_iter
        self.random_state = random_state

    def fit(self, X, y):
        """ Fit training data.

        Parameters
        ----------
        X : array-like; shape = [n_samples, n_features]
            Training vectors, where n_samples is the number of samples and
            n_features is the number of features.
        y : array-like; shape = [n_samples]
            Target values or labels.

        Returns
        -------
        self : object

        """
        rgen = np.random.RandomState(self.random_state)
        self.w_ = rgen.normal(loc=0.0, scale=0.01, size=1+X.shape[1])
        self.cost_ = []

        for i in range(self.n_iter):
            output = self.activation(X)
            
            # Cost function
            error = (y - output)
            cost = (error**2).sum() / 2.0
            self.cost_.append(cost)
            
            # Update rule
            self.w_[1:] += self.eta * X.T.dot(error)
            self.w_[0] += self.eta * error.sum()
            
        return self

    def net_input(self, X):
        """Calculate net input"""
        return np.dot(X, self.w_[1:]) + self.w_[0]

    def activation(self, X):
        """Compute linear activation"""
        return self.net_input(X)

    def predict(self, X):
        """Return class label after unit step"""
        return np.where(self.activation(X) >= 0.0, 1, -1)

In [8]:
ada = AdalineGD(n_iter=20, eta=0.01)
ada.fit(X_train_std, y_train)

# testing accuracy
y_pred = ada.predict(X_test_std)
print('Misclassified samples: %d' % (y_test != y_pred).sum())
print('Accuracy: %.2f' % accuracy_score(y_test, y_pred))

Misclassified samples: 17
Accuracy: 0.43


ADA With 3 Features

In [21]:
X2 = filtered_df[['sepal length (cm)', 'sepal width (cm)', 'petal width (cm)']]
y = pd.factorize(filtered_df['variety'])[0]

X_train, X_test, y_train, y_test = train_test_split(X2, y, test_size=0.30, random_state=42)

print('#Training data points: {}'.format(X_train.shape[0]))
print('#Testing data points: {}'.format(X_test.shape[0]))
print('Class labels: {} (mapped from {}'.format(np.unique(y), np.unique(filtered_df['variety'])))

sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

ada2 = AdalineGD(n_iter=20, eta=0.01)
ada2.fit(X_train_std, y_train)

# testing accuracy
y_pred = ada2.predict(X_test_std)
print('Misclassified samples: %d' % (y_test != y_pred).sum())
print('Accuracy: %.2f' % accuracy_score(y_test, y_pred))

#Training data points: 70
#Testing data points: 30
Class labels: [0 1] (mapped from ['setosa' 'virginica']
Misclassified samples: 17
Accuracy: 0.43


In [22]:
X3 = filtered_df[['sepal length (cm)', 'sepal width (cm)', 'petal width (cm)', 
                'petal length (cm)']]
y = pd.factorize(filtered_df['variety'])[0]

X_train, X_test, y_train, y_test = train_test_split(X3, y, test_size=0.30, random_state=42)

print('#Training data points: {}'.format(X_train.shape[0]))
print('#Testing data points: {}'.format(X_test.shape[0]))
print('Class labels: {} (mapped from {}'.format(np.unique(y), np.unique(filtered_df['variety'])))

sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

ada2 = AdalineGD(n_iter=20, eta=0.01)
ada2.fit(X_train_std, y_train)

# testing accuracy
y_pred = ada2.predict(X_test_std)
print('Misclassified samples: %d' % (y_test != y_pred).sum())
print('Accuracy: %.2f' % accuracy_score(y_test, y_pred))

#Training data points: 70
#Testing data points: 30
Class labels: [0 1] (mapped from ['setosa' 'virginica']
Misclassified samples: 30
Accuracy: 0.00


In all cases, it appears that the AdalineGD model didn't perform well on this dataset. The accuracy is quite low, indicating that the model might not be suitable for this classification task. Additionally, for the 4-feature case, the accuracy dropped to 0%, suggesting that the model might not be able to find a suitable decision boundary for these features.