Assignment 1
Pattern Recognition - André E. Lazzaretti


Utilizando o wine dataset:
- Carregue os dados do dataset;
- Organize um código para selecionar duas features;
- Treine e avalie visualmente, como foi feito em aula, o resultado do modelo ADALINE;
- Repita o processo com outras features e verifique qual par de features fornece o melhor resultado visual.


In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

from sklearn.metrics import confusion_matrix

RuntimeError: module was compiled against NumPy C-API version 0x10 (NumPy 1.23) but the running NumPy has C-API version 0xf. Check the section C-API incompatibility at the Troubleshooting ImportError section at https://numpy.org/devdocs/user/troubleshooting-importerror.html#c-api-incompatibility for indications on how to solve this problem.

In [2]:
DATA = "../assignments/data/wine/wine.data"
df = pd.read_csv(DATA)

In [3]:
df.columns

Index(['Class', 'Alcohol', 'Malic acid', 'Ash', 'Alcalinity of ash',
       'Magnesium', 'Total phenols', 'Flavanoids', 'Nonflavanoid phenols',
       'Proanthocyanins', 'Color intensity', 'Hue',
       'OD280/OD315 of diluted wines', 'Proline'],
      dtype='object')

In [11]:
# correlation between features in confusion matrix

# 1) Alcohol
# 2) Malic acid
# 3) Ash
# 4) Alcalinity of ash  
# 5) Magnesium
# 6) Total phenols
# 7) Flavanoids
# 8) Nonflavanoid phenols
# 9) Proanthocyanins
# 10)Color intensity
# 11)Hue
# 12)OD280/OD315 of diluted wines
# 13)Proline     

y = df["Class"]

X1 = df[['Alcohol', 'Malic acid']]
X2 = df[['Ash', 'Alcalinity of ash']]
X3 = df[['Magnesium', 'Total phenols']]
X4 = df[['Color intensity', 'Hue']]
X5 = df[['Flavanoids','Nonflavanoid phenols']]
X6 = df[['Proanthocyanins', 'OD280/OD315 of diluted wines']]

In [5]:
class AdalineGD:
    """ADAptive LInear NEuron classifier.

    Parameters
    ------------
    eta : float
      Learning rate (between 0.0 and 1.0)
    n_iter : int
      Passes over the training dataset.
    random_state : int
      Random number generator seed for random weight
      initialization.


    Attributes
    -----------
    w_ : 1d-array
      Weights after fitting.
    b_ : Scalar
      Bias unit after fitting.
    losses_ : list
      Mean squared eror loss function values in each epoch.

    """
    def __init__(self, eta=0.01, n_iter=50, random_state=1):
        self.eta = eta
        self.n_iter = n_iter
        self.random_state = random_state

    def fit(self, X, y):
        """ Fit training data.

        Parameters
        ----------
        X : {array-like}, shape = [n_examples, n_features]
          Training vectors, where n_examples is the number of examples and
          n_features is the number of features.
        y : array-like, shape = [n_examples]
          Target values.

        Returns
        -------
        self : object

        """
        rgen = np.random.RandomState(self.random_state)
        self.w_ = rgen.normal(loc=0.0, scale=0.01, size=X.shape[1])
        self.b_ = np.float_(0.)
        self.losses_ = []

        for i in range(self.n_iter):
            net_input = self.net_input(X)
            # Please note that the "activation" method has no effect
            # in the code since it is simply an identity function. We
            # could write `output = self.net_input(X)` directly instead.
            # The purpose of the activation is more conceptual, i.e.,  
            # in the case of logistic regression (as we will see later), 
            # we could change it to
            # a sigmoid function to implement a logistic regression classifier.
            output = self.activation(net_input)
            errors = (y - output)
            
            #for w_j in range(self.w_.shape[0]):
            #    self.w_[w_j] += self.eta * (2.0 * (X[:, w_j]*errors)).mean()
            
            self.w_ += self.eta * 2.0 * X.T.dot(errors) / X.shape[0]
            self.b_ += self.eta * 2.0 * errors.mean()
            loss = (errors**2).mean()
            self.losses_.append(loss)
        return self

    def net_input(self, X):
        """Calculate net input"""
        return np.dot(X, self.w_) + self.b_

    def activation(self, X):
        """Compute linear activation"""
        return X

    def predict(self, X):
        """Return class label after unit step"""
        return np.where(self.activation(self.net_input(X)) >= 0.5, 1, 0)

In [6]:
adaline1 = AdalineGD(eta=0.001, n_iter=10, random_state=1)
adaline1.fit(X1, y)

fig = px.line(x=range(1, adaline1.n_iter + 1), 
              y=adaline1.losses_,
              labels={'x': 'Epochs', 'y': 'Mean Squared Error'},
              title='Adaline - Learning Rate: {}'.format(adaline1.eta))
fig.update_traces(line=dict(color='blue', width=2))
fig.update_layout(title_x=0.5, xaxis_title='Epochs', yaxis_title='Mean Squared Error')
fig.show()


In [15]:
adaline2 = AdalineGD(eta=0.001, n_iter=10, random_state=1)
adaline2.fit(X2, y)

fig = px.line(x=range(1, adaline2.n_iter + 1), 
              y=adaline2.losses_,
              labels={'x': 'Epochs', 'y': 'Mean Squared Error'},
              title='Adaline - Learning Rate: {}'.format(adaline2.eta))
fig.update_traces(line=dict(color='blue', width=2))
fig.update_layout(title_x=0.5, xaxis_title='Epochs', yaxis_title='Mean Squared Error')
fig.show()


In [18]:
adaline3 = AdalineGD(eta=0.001, n_iter=10, random_state=1)
adaline3.fit(X3, y)

fig = px.line(x=range(1, adaline3.n_iter + 1), 
              y=adaline3.losses_,
              labels={'x': 'Epochs', 'y': 'Mean Squared Error'},
              title='Adaline - Learning Rate: {}'.format(adaline3.eta))
fig.update_traces(line=dict(color='blue', width=2))
fig.update_layout(title_x=0.5, xaxis_title='Epochs', yaxis_title='Mean Squared Error')
fig.show()


In [20]:
adaline4 = AdalineGD(eta=0.01, n_iter=10, random_state=1)
adaline4.fit(X4, y)

fig = px.line(x=range(1, adaline4.n_iter + 1), 
              y=adaline4.losses_,
              labels={'x': 'Epochs', 'y': 'Mean Squared Error'},
              title='Adaline - Learning Rate: {}'.format(adaline4.eta))
fig.update_traces(line=dict(color='blue', width=2))
fig.update_layout(title_x=0.5, xaxis_title='Epochs', yaxis_title='Mean Squared Error')
fig.show()

In [24]:
adaline5 = AdalineGD(eta=0.1, n_iter=10, random_state=1)
adaline5.fit(X5, y)

fig = px.line(x=range(1, adaline5.n_iter + 1), 
              y=adaline5.losses_,
              labels={'x': 'Epochs', 'y': 'Mean Squared Error'},
              title='Adaline - Learning Rate: {}'.format(adaline5.eta))
fig.update_traces(line=dict(color='blue', width=2))
fig.update_layout(title_x=0.5, xaxis_title='Epochs', yaxis_title='Mean Squared Error')
fig.show()

In [32]:
adaline6 = AdalineGD(eta=0.06, n_iter=10, random_state=1)
adaline6.fit(X6, y)

fig = px.line(x=range(1, adaline6.n_iter + 1),
              y=adaline6.losses_,
              labels={'x': 'Epochs', 'y': 'Mean Squared Error'},
              title='Adaline - Learning Rate: {}'.format(adaline6.eta))
fig.update_traces(line=dict(color='blue', width=2))
fig.update_layout(title_x=0.5, xaxis_title='Epochs', yaxis_title='Mean Squared Error')
fig.show()