# Assignment 1: Linear Regression

MIPT Master Program: ML Systems for Smart Production

Course: ML (Semester 1)

Assignment 1: **Linear Regression**

Code by Dmitry Kochetkov

In [335]:
import pandas as pd
import numpy as np
from sklearn import linear_model, base
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.model_selection import GridSearchCV
import enum
import matplotlib.pyplot as plt

plt.style.use('dark_background')

## Loading Data

In [336]:
class Dataset:
    def __init__(self, path: str):
        if not isinstance(path, str):
            raise ValueError('Path must be str')

        path = './data/3.txt'
        with open(path) as data_file:
            dim = int(data_file.readline()) # number of features
            x_train = list()
            y_train = list()
            x_test = list()
            y_test = list()

            n = int(data_file.readline()) # train set size
            for i in range(n):
                data_row = list(map(float, data_file.readline().split()))
                x_train.append(data_row[:-1])
                y_train.append(data_row[-1])
            
            k = int(data_file.readline()) # test set size
            for i in range(k):
                data_row = list(map(float, data_file.readline().split()))
                x_test.append(data_row[:-1])
                y_test.append(data_row[-1])

            self.x_train = np.matrix(x_train)
            self.y_train = np.matrix(y_train)
            self.x_test = np.matrix(x_test)
            self.y_test = np.matrix(y_test)

            self.y_train = self.y_train.transpose()
            self.y_test = self.y_test.transpose()

## Define Metrics

In [337]:
class Metrics:
    def mape(actual, predicted):
        return mean_absolute_percentage_error(actual, predicted)

    def smape(actual, predicted):
        A = np.array(actual.transpose())
        P = np.array(predicted.transpose())
        return 1.0 / len(A) * np.sum(2 * np.abs(P - A) / (np.abs(A) + np.abs(P)))

    def nrmse(actual, predicted):
        return mean_squared_error(actual, predicted, squared=False) / (actual.max() - actual.min())

## Main function

In [338]:
regressors = dict()
regressors['ols'] = linear_model.LinearRegression()
regressors['ridge'] = linear_model.Ridge()
regressors['lasso'] = linear_model.Lasso()
regressors['elastic-net'] = linear_model.ElasticNet()
regressors['sgd'] = linear_model.SGDRegressor()

def assignment(dataset: Dataset, method='ols'):
    regressor = regressors[method]
    regressor.fit(dataset.x_train, dataset.y_train)
    predicted = regressor.predict(dataset.x_test)

    print('Result MAPE: {0}'.format(Metrics.mape(dataset.y_test, predicted)))
    print('Result SMAPE: {0}'.format(Metrics.smape(dataset.y_test, predicted)))
    print('Result NRMSE: {0}'.format(Metrics.nrmse(dataset.y_test, predicted)))
    print('Coefficient of determination: {0}'.format(r2_score(dataset.y_test, predicted)))

## OLS

In [339]:
dataset = Dataset('./data/1.txt')
assignment(dataset, 'ols')

Result MAPE: 0.0022111485096134786
Result SMAPE: 0.5561509053151132
Result NRMSE: 0.0008965152289746172
Coefficient of determination: 0.9999775126820426


## OLS: Ridge

In [340]:
assignment(dataset, 'ridge')

Result MAPE: 0.00028471155330226896
Result SMAPE: 0.07306178223749862
Result NRMSE: 0.00011841372029401789
Coefficient of determination: 0.9999996076927178


## OLS: Lasso

In [341]:
assignment(dataset, 'lasso')

Result MAPE: 0.00027776587329172086
Result SMAPE: 0.07163178274323502
Result NRMSE: 5.950619961484769e-05
Coefficient of determination: 0.9999999009289542


## OLS: Elastic-Net

In [342]:
assignment(dataset, 'elastic-net')

Result MAPE: 0.0002777658748746221
Result SMAPE: 0.07163178315444149
Result NRMSE: 5.950620001907231e-05
Coefficient of determination: 0.9999999009289529
