In [1]:
import pandas as pd
import numpy as np
from sklearn import preprocessing

In [2]:
# Make a prediction with coefficients
def predict(row, coefficients):
  yhat = coefficients[0]
  for i in range(len(row)-1):
    yhat += coefficients[i + 1] * row[i]
  return yhat

In [3]:
# Estimate linear regression coefficients using stochastic gradient descent
def coefficients_sgd(train, l_rate, n_epoch):
    coef = [0.0 for i in range(len(train[0]))]
    print ('Coeficiente Inicial=' + str(coef))
    for epoch in range(n_epoch):
        sum_error = 0
        for row in train:
            yhat = predict(row, coef)
            error = yhat - row[-1]
            sum_error += error**2
            coef[0] = coef[0] - l_rate * error
            for i in range(len(row)-1):
                coef[i + 1] = coef[i + 1] - l_rate * error * row[i] 
        print(('epoch=%d, lrate=%.3f, error=%.3f' % (epoch, l_rate, sum_error)))
    return coef

In [4]:
def normalize(dataset):
    dataset_aux = dataset[dataset.columns[0:11]]
    print(dataset_aux)
    dataset_values = dataset_aux.values
    min_max_scaler = preprocessing.MinMaxScaler(feature_range=(1, 10))
    dataset_scaled = min_max_scaler.fit_transform(dataset_values)
    df = pd.DataFrame(dataset_scaled)
    return df

In [5]:
dataset = pd.read_csv("winequality-white.csv", delimiter=";")
dataset

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.270,0.36,20.70,0.045,45.0,170.0,1.00100,3.00,0.45,8.800000,6
1,6.3,0.300,0.34,1.60,0.049,14.0,132.0,0.99400,3.30,0.49,9.500000,6
2,8.1,0.280,0.40,6.90,0.050,30.0,97.0,0.99510,3.26,0.44,10.100000,6
3,7.2,0.230,0.32,8.50,0.058,47.0,186.0,0.99560,3.19,0.40,9.900000,6
4,7.2,0.230,0.32,8.50,0.058,47.0,186.0,0.99560,3.19,0.40,9.900000,6
5,8.1,0.280,0.40,6.90,0.050,30.0,97.0,0.99510,3.26,0.44,10.100000,6
6,6.2,0.320,0.16,7.00,0.045,30.0,136.0,0.99490,3.18,0.47,9.600000,6
7,7.0,0.270,0.36,20.70,0.045,45.0,170.0,1.00100,3.00,0.45,8.800000,6
8,6.3,0.300,0.34,1.60,0.049,14.0,132.0,0.99400,3.30,0.49,9.500000,6
9,8.1,0.220,0.43,1.50,0.044,28.0,129.0,0.99380,3.22,0.45,11.000000,6


In [6]:
train_dataset = dataset.head(int(len(dataset)*0.8))
test_dataset = dataset.tail(int(len(dataset)*0.2))
dataset_norm = normalize(train_dataset)

      fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0               7.0             0.270         0.36           20.70      0.045   
1               6.3             0.300         0.34            1.60      0.049   
2               8.1             0.280         0.40            6.90      0.050   
3               7.2             0.230         0.32            8.50      0.058   
4               7.2             0.230         0.32            8.50      0.058   
5               8.1             0.280         0.40            6.90      0.050   
6               6.2             0.320         0.16            7.00      0.045   
7               7.0             0.270         0.36           20.70      0.045   
8               6.3             0.300         0.34            1.60      0.049   
9               8.1             0.220         0.43            1.50      0.044   
10              8.1             0.270         0.41            1.45      0.033   
11              8.6         

In [7]:
l_rate = 0.001
n_epoch = 100

In [8]:
coeff = coefficients_sgd(dataset_norm.values, l_rate, n_epoch)
print(coeff)

Coeficiente Inicial=[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
epoch=0, lrate=0.001, error=8823.129
epoch=1, lrate=0.001, error=6753.244
epoch=2, lrate=0.001, error=5915.760
epoch=3, lrate=0.001, error=5248.493
epoch=4, lrate=0.001, error=4707.889
epoch=5, lrate=0.001, error=4270.068
epoch=6, lrate=0.001, error=3914.993
epoch=7, lrate=0.001, error=3626.265
epoch=8, lrate=0.001, error=3390.690
epoch=9, lrate=0.001, error=3197.712
epoch=10, lrate=0.001, error=3038.904
epoch=11, lrate=0.001, error=2907.545
epoch=12, lrate=0.001, error=2798.270
epoch=13, lrate=0.001, error=2706.797
epoch=14, lrate=0.001, error=2629.711
epoch=15, lrate=0.001, error=2564.279
epoch=16, lrate=0.001, error=2508.320
epoch=17, lrate=0.001, error=2460.088
epoch=18, lrate=0.001, error=2418.185
epoch=19, lrate=0.001, error=2381.489
epoch=20, lrate=0.001, error=2349.101
epoch=21, lrate=0.001, error=2320.297
epoch=22, lrate=0.001, error=2294.494
epoch=23, lrate=0.001, error=2271.221
epoch=24, lrate=0.001,

epoch=215, lrate=0.001, error=1931.716
epoch=216, lrate=0.001, error=1931.717
epoch=217, lrate=0.001, error=1931.718
epoch=218, lrate=0.001, error=1931.719
epoch=219, lrate=0.001, error=1931.721
epoch=220, lrate=0.001, error=1931.722
epoch=221, lrate=0.001, error=1931.723
epoch=222, lrate=0.001, error=1931.724
epoch=223, lrate=0.001, error=1931.725
epoch=224, lrate=0.001, error=1931.726
epoch=225, lrate=0.001, error=1931.727
epoch=226, lrate=0.001, error=1931.728
epoch=227, lrate=0.001, error=1931.729
epoch=228, lrate=0.001, error=1931.730
epoch=229, lrate=0.001, error=1931.731
epoch=230, lrate=0.001, error=1931.732
epoch=231, lrate=0.001, error=1931.733
epoch=232, lrate=0.001, error=1931.734
epoch=233, lrate=0.001, error=1931.735
epoch=234, lrate=0.001, error=1931.736
epoch=235, lrate=0.001, error=1931.737
epoch=236, lrate=0.001, error=1931.738
epoch=237, lrate=0.001, error=1931.739
epoch=238, lrate=0.001, error=1931.739
epoch=239, lrate=0.001, error=1931.740
epoch=240, lrate=0.001, e

epoch=427, lrate=0.001, error=1931.774
epoch=428, lrate=0.001, error=1931.774
epoch=429, lrate=0.001, error=1931.774
epoch=430, lrate=0.001, error=1931.774
epoch=431, lrate=0.001, error=1931.774
epoch=432, lrate=0.001, error=1931.774
epoch=433, lrate=0.001, error=1931.774
epoch=434, lrate=0.001, error=1931.774
epoch=435, lrate=0.001, error=1931.774
epoch=436, lrate=0.001, error=1931.774
epoch=437, lrate=0.001, error=1931.774
epoch=438, lrate=0.001, error=1931.774
epoch=439, lrate=0.001, error=1931.774
epoch=440, lrate=0.001, error=1931.774
epoch=441, lrate=0.001, error=1931.774
epoch=442, lrate=0.001, error=1931.774
epoch=443, lrate=0.001, error=1931.774
epoch=444, lrate=0.001, error=1931.774
epoch=445, lrate=0.001, error=1931.774
epoch=446, lrate=0.001, error=1931.774
epoch=447, lrate=0.001, error=1931.774
epoch=448, lrate=0.001, error=1931.774
epoch=449, lrate=0.001, error=1931.774
epoch=450, lrate=0.001, error=1931.774
epoch=451, lrate=0.001, error=1931.774
epoch=452, lrate=0.001, e

epoch=643, lrate=0.001, error=1931.775
epoch=644, lrate=0.001, error=1931.775
epoch=645, lrate=0.001, error=1931.775
epoch=646, lrate=0.001, error=1931.775
epoch=647, lrate=0.001, error=1931.775
epoch=648, lrate=0.001, error=1931.775
epoch=649, lrate=0.001, error=1931.775
epoch=650, lrate=0.001, error=1931.775
epoch=651, lrate=0.001, error=1931.775
epoch=652, lrate=0.001, error=1931.775
epoch=653, lrate=0.001, error=1931.775
epoch=654, lrate=0.001, error=1931.775
epoch=655, lrate=0.001, error=1931.775
epoch=656, lrate=0.001, error=1931.775
epoch=657, lrate=0.001, error=1931.775
epoch=658, lrate=0.001, error=1931.775
epoch=659, lrate=0.001, error=1931.775
epoch=660, lrate=0.001, error=1931.775
epoch=661, lrate=0.001, error=1931.775
epoch=662, lrate=0.001, error=1931.775
epoch=663, lrate=0.001, error=1931.775
epoch=664, lrate=0.001, error=1931.775
epoch=665, lrate=0.001, error=1931.775
epoch=666, lrate=0.001, error=1931.775
epoch=667, lrate=0.001, error=1931.775
epoch=668, lrate=0.001, e

epoch=855, lrate=0.001, error=1931.775
epoch=856, lrate=0.001, error=1931.775
epoch=857, lrate=0.001, error=1931.775
epoch=858, lrate=0.001, error=1931.775
epoch=859, lrate=0.001, error=1931.775
epoch=860, lrate=0.001, error=1931.775
epoch=861, lrate=0.001, error=1931.775
epoch=862, lrate=0.001, error=1931.775
epoch=863, lrate=0.001, error=1931.775
epoch=864, lrate=0.001, error=1931.775
epoch=865, lrate=0.001, error=1931.775
epoch=866, lrate=0.001, error=1931.775
epoch=867, lrate=0.001, error=1931.775
epoch=868, lrate=0.001, error=1931.775
epoch=869, lrate=0.001, error=1931.775
epoch=870, lrate=0.001, error=1931.775
epoch=871, lrate=0.001, error=1931.775
epoch=872, lrate=0.001, error=1931.775
epoch=873, lrate=0.001, error=1931.775
epoch=874, lrate=0.001, error=1931.775
epoch=875, lrate=0.001, error=1931.775
epoch=876, lrate=0.001, error=1931.775
epoch=877, lrate=0.001, error=1931.775
epoch=878, lrate=0.001, error=1931.775
epoch=879, lrate=0.001, error=1931.775
epoch=880, lrate=0.001, e