In [6]:
# Imports
import numpy as np
import math
import pandas
from bokeh.plotting import figure, show, output_file, output_notebook
from bokeh.layouts import column
from sklearn import datasets, metrics
from sklearn.linear_model import LinearRegression
import numpy.linalg as linalg

In [7]:
# Data importing and organizing
data = pandas.read_csv("forestfires.csv", sep=",")
X = data.as_matrix(["FFMC","DMC","DC","ISI","temp","RH","wind"])
X = X.T
X = np.append(X,[np.ones(len(X[0]))],axis=0)
X = X.T
Y = data.as_matrix(["area"])

In [8]:
# Alghoritm definition
def msum(a,b):
    res = []
    for i in range(len(a)):
        res_sub = []
        for j in range(len(a[i])):
            res_sub.append(a[i][j]+b[i][j])
        res.append(res_sub)
    return np.asarray(res)

def msub(a,b):
    res = []
    for i in range(len(a)):
        res_sub = []
        for j in range(len(a[i])):
            res_sub.append(a[i][j]-b[i][j])
        res.append(res_sub)
    return np.asarray(res)

def mmult(a,b):
    return np.asarray(np.dot(a,b))

def minv(a):
    return np.asarray(linalg.inv(a))

def mI(val,size):
    res = []
    for i in range(size):
        res_sub = []
        for j in range(size):
            res_sub.append(val if i==j else 0)
        res.append(res_sub)
    return np.asarray(res)

def mIs(val,size):
    res = []
    for i in range(size):
        res_sub = []
        for j in range(size):
            res_sub.append((val[i] if i<len(val) else 0) if i==j else 0)
        res.append(res_sub)
    return np.asarray(res)

def linear(X,y):
    a = mmult(X.T,X)
    b = mmult(X.T,y)
    return linalg.solve(a,b)

def takeFromSvd(X,amnt):
    V, D, U = linalg.svd(X,compute_uv=True, full_matrices=False)
    D = mIs(D,len(D))
    V = V[:,:amnt]
    D = D[:amnt,:amnt]
    U = U[:amnt,:amnt]
    nX = mmult(V,mmult(D,U))
    return nX

In [13]:
# Calculations
# Multi Dimensional
coefsMD = linear(X,Y)

line = "y = "
for i in range(len(coefsMD)):
    if(i != 0 ):
        line = line + (" +" if(coefsMD[i][0]>=0) else "")
    line = line + str(coefsMD[i][0])+(("*x_"+str(i+1)) if i != len(coefsMD)-1 else "")
print(line)

# One Dimensional
nX = takeFromSvd(X,2)
coefs2D = linear(nX,Y)
line = "y = "
for i in range(len(coefs2D)):
    if(i != 0 ):
        line = line + (" +" if(coefsMD[i][0]>=0) else "")
    line = line + str(coefs2D[i][0])+(("*x_"+str(i+1)) if i != len(coefs2D)-1 else "")
print(line)

y = -0.0322640169755*x_1 +0.0767763112237*x_2-0.005536141137*x_3-0.695845237536*x_4 +0.818847893583*x_5-0.206876267535*x_6 +1.49062061581*x_7 +4.27674160984
y = 0.0763092318168*x_1 +0.0557658873356


In [10]:
# Visualizations

TOOLS="hover,crosshair,pan,wheel_zoom,zoom_in,zoom_out,box_zoom,reset,tap,save,"
p = figure(tools=TOOLS)
p.scatter(nX[:,0],[x[0] for x in Y],fill_color=["#%02x%02x%02x" % (255,0,0) for w in Y])
p.line([x for x in nX[:,0]],[coefs2D[-1][0]+x*coefs2D[0][0] for x in nX[:,0]])


output_file("LinearRegression.html", title="Linear Regression")
output_notebook()

show(p)