In [1]:
import csv
import numpy
import plotly
from scipy.optimize import linprog
from plotly.graph_objs import Scatter, Layout


def read_data(file_path):
    matrix = list()
    vector = list()
    with open(file_path, newline='') as file:
        reader = csv.reader(file, delimiter=',', quotechar='|')
        next(reader)
        for row in reader:
            matrix_row = list()
            for i in range(0, 4):
                matrix_row.append(float(row[i]))
            matrix.append(matrix_row)
            vector.append(float(row[4]))
    return [matrix, vector]


def find_mean_square_error(result_vector, output_vector):
    size = numpy.matrix(result_vector).shape[0]
    difference = numpy.power(numpy.matrix(result_vector - output_vector), 2)
    return  numpy.sum(difference, axis=None) / size


def compose_matrix(matrix):
    dimension = len(matrix[0])
    size = len(matrix)
    identity_minus = -numpy.identity(size)
    weight_matrix = numpy.matrix(matrix)
    first_part = numpy.concatenate((weight_matrix, identity_minus), axis = 1)
    second_part = numpy.concatenate((-weight_matrix, identity_minus), axis = 1)
    matrix_result = numpy.concatenate((first_part, second_part), axis=0)
    return matrix_result


def solve_linear(matrix, vector, tolerance):
    X_values = numpy.matrix(matrix)
    dimension = len(matrix[0])
    size = len(matrix)
    weight_bounds = (None, None)
    a_bounds = (0.0, None)
    list_bounds = dimension * [weight_bounds] + size * [a_bounds]
    vector_b = numpy.concatenate((numpy.array(vector), -numpy.array(vector)))
    matrix_A = compose_matrix(matrix).tolist()
    vector_c = dimension * [0] + size * [1]
    res = linprog(vector_c, A_ub=matrix_A, b_ub=vector_b, bounds=list_bounds,
                  options={"disp": True, "bland": True, "tol": tolerance})
    weights = res.x[:4]
    y_results = list()
    for i in range(0, size):
        y_real = 0.0
        for j in range(0, dimension):
            y_real = y_real + weights[j] * matrix[i][j]
        y_results.append(y_real)
    return y_results


def solve_SVD(X, y, dim, coefficient):
    U, S, V_transposed = numpy.linalg.svd(X, full_matrices=False)
    V = numpy.matrix(V_transposed).transpose()
    D = numpy.zeros((dim, dim))
    for idx in range(0, dim):
        D[idx, idx] = (S[idx] ** 2) / ((S[idx] ** 2) + coefficient)
    first = numpy.dot(V, D)
    sec = numpy.dot(first, V_transposed)
    y_real = numpy.dot(sec, y)
    return y_real


[matrix, vector] = read_data('D:\\basketball.csv')
x_coeficients = list(range(0, len(vector)))
tolerance = 0.001
linear_results = solve_linear(matrix, vector, tolerance)

X = numpy.matrix(matrix).transpose()
dim = X.shape[0]
size = X.shape[1]
y = numpy.matrix(vector).transpose()
tau_coefficient = 0.4
SVD_results = solve_SVD(X, y, dim, tau_coefficient)

square_error = find_mean_square_error(SVD_results, y)
errors = list()
coefficients = numpy.arange(0.0, 0.6, 0.001)
for coefficient in coefficients:
    y_real = solve_SVD(X, y, dim, coefficient)
    square_error = find_mean_square_error(y_real, y)
    errors.append(square_error)

trace = Scatter(
        x=coefficients,
        y=errors
    )
plotly.offline.plot([trace])

Optimization terminated successfully.
         Current function value: 208.953912  
         Iterations: 138


'file://C:\\Users\\Анита\\temp-plot.html'