In [None]:
import matplotlib.pyplot as plt
import numpy as np

In [None]:
class LinearModel(object):
    """Base class for linear models."""

    def __init__(self, step_size=0.2, max_iter=100, eps=1e-5,
                 theta_0=None, verbose=True):
        """
        Args:
            step_size: Step size for iterative solvers only.
            max_iter: Maximum number of iterations for the solver.
            eps: Threshold for determining convergence.
            theta_0: Initial guess for theta. If None, use the zero vector.
            verbose: Print loss values during training.
        """
        self.theta = theta_0
        self.step_size = step_size
        self.max_iter = max_iter
        self.eps = eps
        self.verbose = verbose
        
def add_intercept(x):
    """Add intercept to matrix x.

    Args:
        x: a matrix.

    Returns:
        New matrix same as x with 1's in the 0th column.
    """
    new_x = np.empty((x.shape[0], x.shape[1] + 1), dtype=x.dtype)
    new_x[:, 0] = 1
    new_x[:, 1:] = x

    return new_x

def load_dataset(csv_path, label_col='y', add_intercept=False):
    """Load dataset from a CSV file.

    Args:
         csv_path: Path to CSV file containing dataset.
         label_col: Name of column to use as labels (should be 'y' or 'l').
         add_intercept: Add an intercept entry to x-values.

    Returns:
        xs: Numpy array of x-values (inputs).
        ys: Numpy array of y-values (labels).
    """

    def add_intercept_fn(x):
        global add_intercept
        return add_intercept(x)

    # Validate label_col argument
    allowed_label_cols = ('y', 't')
    if label_col not in allowed_label_cols:
        raise ValueError('Invalid label_col: {} (expected {})'
                         .format(label_col, allowed_label_cols))

    # Load headers
    with open(csv_path, 'r') as csv_fh:
        headers = csv_fh.readline().strip().split(',')

    # Load features and labels
    x_cols = [i for i in range(len(headers)) if headers[i].startswith('x')]
    l_cols = [i for i in range(len(headers)) if headers[i] == label_col]
    inputs = np.loadtxt(csv_path, delimiter=',', skiprows=1, usecols=x_cols, dtype=np.longfloat)
    labels = np.loadtxt(csv_path, delimiter=',', skiprows=1, usecols=l_cols, dtype=np.longfloat)

    if inputs.ndim == 1:
        inputs = np.expand_dims(inputs, -1)

    if add_intercept:
        inputs = add_intercept_fn(inputs)
    return inputs, labels

In [None]:
ds4_train_set_path = "C:/Users/acer/Desktop/ds4_train.csv"
ds4_valid_set_path = "C:/Users/acer/Desktop/ds4_valid.csv"

In [None]:
x_train, y_train = load_dataset(ds4_train_set_path, add_intercept=True)
x_valid, y_valid = load_dataset(ds4_valid_set_path, add_intercept=True)

In [None]:
class PoissonRegression(LinearModel):
    """Poisson Regression.

    Example usage:
        > pr = PoissonRegression(step_size=lr)
        > pr.fit(x_train, y_train)
        > pr.predict(x_eval)
    """

    def h(self, theta, x):
        """Vectorized implementation of h.

        :param theta: Shape (n,).
        :param x:     Training example inputs. Shape (m, n).
        :return:      The hypothesis of Poisson Regression, given by h(x) = exp(theta^T x). Shape (m,).
        """
        h_theta = np.array(np.dot(x, theta), dtype=np.longdouble)
        return np.exp(np.dot(x, theta))

    def fit(self, x, y):
        """Run gradient ascent to maximize likelihood for Poisson regression.

        :param x: Training example inputs. Shape (m, n).
        :param y: Training example labels. Shape (m,).
        """

        def next_step(theta):
            return self.step_size / m * np.dot(x.T, (y - self.h(theta, x)))

        m, n = x.shape

        # Initialize theta
        if self.theta is None:
            theta = np.zeros(n)
        else:
            theta = self.theta

        # Update theta
        step = next_step(theta)
        while np.linalg.norm(step, 1) >= self.eps:
            theta += step
            step = next_step(theta)
            
        self.theta = theta

    def predict(self, x):
        """Make a prediction given inputs x.

        :param x: Inputs of shape (m, n).
        :return   Floating-point prediction for each input, shape (m,).
        """

        return self.h(self.theta, x)

In [None]:
class PoissonRegression(LinearModel):
    """Poisson Regression.

    Example usage:
        > clf = PoissonRegression(step_size=lr)
        > clf.fit(x_train, y_train)
        > clf.predict(x_eval)
    """

    def h(self, theta, x):
        """Vectorized implementation of h.

        :param theta: Shape (n,).
        :param x:     Training example inputs. Shape (m, n).
        :return:      The hypothesis of Poisson Regression, given by h(x) = exp(theta^T x). Shape (m,).
        """
        return np.exp(x @ theta)

    def fit(self, x, y):
        """Run gradient ascent to maximize likelihood for Poisson regression.

        :param x: Training example inputs. Shape (m, n).
        :param y: Training example labels. Shape (m,).
        """

        def next_step(theta):
            return self.step_size / m * np.dot(x.T, (y - self.h(theta, x)))

        m, n = x.shape

        # Initialize theta
        if self.theta is None:
            theta = np.zeros(n)
        else:
            theta = self.theta

        # Update theta
        step = next_step(theta)
        while np.linalg.norm(step, 1) >= self.eps:
            theta += step
            step = next_step(theta)

        self.theta = theta

    def predict(self, x):
        """Make a prediction given inputs x.

        :param x: Inputs of shape (m, n).
        :return   Floating-point prediction for each input, shape (m,).
        """

        return self.h(self.theta, x)

In [None]:
pr = PoissonRegression()
pr.fit(x_train, y_train)

def plot(y_label, y_pred, title):
    plt.plot(y_label, 'go', label='label')
    plt.plot(y_pred, 'rx', label='prediction')
    plt.suptitle(title, fontsize=12)
    plt.legend(loc='upper left')
    
y_train_pred = pr.predict(x_train)
plot(y_train, y_train_pred, 'Training Set')

y_valid_pred = pr.predict(x_valid)
plot(y_valid, y_valid_pred, 'Validation Set')