In [None]:
%autosave 15
%matplotlib inline

import numpy as np
import scipy as sp
import pandas as pd
import math
import random

from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
from IPython.display import display, HTML

In [None]:
def getData():
    fin = open('prices.txt', 'r')
    lines = fin.readlines()
    fin.close()
    
    actualPrice = dict()
    data = list()
    
    for area, rooms, price in [map(int, x.split(',')) for x in lines[1:]]:
        data.append((area, rooms))
        actualPrice[(area, rooms)] = price
    
    return (data, actualPrice)

data, actualPrice = getData()

In [None]:
def k_fold_cv(k, length):
    one_fold_length = length // k
    others = length % k
    indexies = [i for i in range(length)]
    result = list()
    for i in range(k):
        test_suit = list()
        train_suit = list()

        for j in range(one_fold_length):
            index = indexies[int(np.random.uniform(0, len(indexies))) % len(indexies)]
            test_suit.append(index)
            indexies.remove(index)
        if others > 0:
            others -= 1
            index = indexies[int(np.random.uniform(0, len(indexies))) % len(indexies)]
            test_suit.append(index)
            indexies.remove(index)
        
        for j in range(length):
            if j not in test_suit:
                train_suit.append(j)
        
        result.append((train_suit, test_suit))
    return result

In [None]:
# w - вектор коэффициентов [area, rooms, free]

def predictPrice(area, rooms, w):
    return area * w[0] + rooms * w[1] + w[2]


def standardDeviation(w, data):
    s = 0
    for area, rooms in data:
        s += (predictPrice(area, rooms, w) - actualPrice[(area, rooms)]) ** 2
    res = s / len(data)
    return np.sqrt(res)

In [None]:
def getArrays(w, data):
    x  = [d[0] for d in data]
    y  = [d[1] for d in data]
    pp = [predictPrice(d[0], d[1], w) for d in data]
    ap = [actualPrice[(d[0], d[1])] for d in data]
    return x, y, pp, ap

#предсказанные значения vs реальные
def printResults1(w, data):
    x, y, pp, ap = getArrays(w, data)
    fig = plt.figure()
    ax = Axes3D(fig)
    ax.scatter(xs=x, ys=y, zs=pp, zdir='z', color='r')
    ax.scatter(xs=x, ys=y, zs=ap, zdir='z', label='ss', color='g')
    ax.set_xlabel('area')
    ax.set_ylabel('rooms')
    ax.set_zlabel('price')
    plt.show()
    
#плоскость, полученная в результате регрессии    
def printResults2(w, data):
    x, y, pp, ap = getArrays(w, data)
    ax = Axes3D(plt.figure())
    x_surf=np.arange(0, 5000, 1)   
    y_surf=np.arange(0, 5, 1)
    X, Y = np.meshgrid(x_surf, y_surf)
    Z = predictPrice(X, Y, w)
    ax.scatter(xs=x, ys=y, zs=ap, zdir='z', label='ys=0, zdir=z', color='g')
    ax.plot_surface(X, Y, Z, rstride=10, cstride=10, color='r')
    ax.set_xlabel('area')
    ax.set_ylabel('rooms')
    ax.set_zlabel('price')
    plt.show()

In [None]:
# w0 - вектор коэффициентов [area, rooms, free]
def gradient(w, trainingData, step):
    dw = [0, 0, 0]
    for area, rooms in trainingData:
        err = predictPrice(area, rooms, w) - actualPrice[(area, rooms)]
        dw[0] += err * area
        dw[1] += err * rooms
        dw[2] += err
    dw = [dwi * 2 / len(trainingData) for dwi in dw]
    return [w[i] - step * dw[i] for i in range(len(w))]

# w0 - вектор начальных коэффициентов [area, rooms, free]
# step - размер шага (используется переменный шаг)
# маленький шаг - медленно, большой - есть шанс застрять, не доходя до минимума 
# eps - критерий_остановки разность между векторами, или разность между стандартными отклонениями
# n - максимальное количество шагов
def gradientDescent(w0, trainingData, step, epsQ, epsV, n):
    w = w0
    for i in range(1, n + 1):
        prevw = w
        prevSD = standardDeviation(w, trainingData)
        w = gradient(w, trainingData, step / i)
        if (abs(prevSD - standardDeviation(w, trainingData)) < epsQ):
            return w
        if (sum([(prevw[i] - w[i]) ** 2 for i in range(len(w))]) ** 0.5 < epsV):
            return w
    return w

In [None]:
def gradientLinearRegression(w0, data, step, cv_params = (1, 10)):
    tfold, kfold = cv_params
    average_err = 0
    for i in range(tfold):
        kfold_index = k_fold_cv(kfold, len(data))
        for train_suit, test_suit in kfold_index:
            training_suit = [data[i] for i in train_suit]
            testing_suit = [data[i] for i in test_suit]
            w = gradientDescent(w0, training_suit, step, 0.1)
            print(w)
            average_err += standardDeviation(w, testing_suit)
        
    average_err /= tfold * kfold
    
    return average_err

In [None]:
w = gradientDescent([0, 0, 0], data, 0.0000001, 0.1, 0, 10000000)
print('weights:', w, 'deviation', standardDeviation(w, data))
resValues = pd.DataFrame(columns=['area', 'rooms', 'price', 'predict'])
for area, rooms in data:
    cur = pd.DataFrame([[area, rooms, actualPrice[(area, rooms)], predictPrice(area, rooms, w)]], 
                       columns=['area', 'rooms', 'price', 'predict'])
    resValues = resValues.append(cur, ignore_index=True)

display(resValues)

In [None]:
printResults2(w, data)

In [None]:
def differential_evolution(trainingData, crossover_probability=0.8):
    F = 0.7
    def generate_vector():
        return [np.random.uniform(-1000000,1000000) for i in range(3)]
    
    population = [generate_vector() for i in range(20)]
    
    for i in range(10000):
        for j in range(len(population)):
            old_entity = population[j]
            ind = list()
            while len(ind) < 3:
                temp = int(np.random.uniform(0, len(population)))
                if temp != j:
                    ind.append(temp)
            ind = [population[k] for k in ind]
            new_entity = [ind[0][k] + F * (ind[1][k] - ind[2][k]) for k in range(3)]
            
            for k in range(3):
                if np.random.uniform(0, 1) > crossover_probability:
                    new_entity[k] = old_entity[k]
            if standardDeviation(new_entity, trainingData) < standardDeviation(old_entity, trainingData):
                population[j] = new_entity
           
    deviation = -1
    result = None
    for ent in population:
        current = standardDeviation(ent, trainingData)
        if deviation == -1 or deviation > current:
            deviation = current
            result    = ent
    return result, deviation

w_evolution, accuracy = differential_evolution(data)
print('weights:', w_evolution, 'deviation', accuracy)

In [None]:
printResults2(w, data)