# Regresion Lineal

A continuacion vamos a implementar una regresion lineal como base para 
la comprension de redes neuronales

In [None]:
import matplotlib
matplotlib.use('Agg')
%pylab inline
import pandas
import datetime as dt

from bigdl.nn.layer import *
from bigdl.nn.criterion import *
from bigdl.optim.optimizer import *
from bigdl.util.common import *
from bigdl.util.common import Sample
import matplotlib.pyplot as plt
from bigdl.dataset.transformer import *
from matplotlib.pyplot import imshow
from pyspark import SparkContext
sc=SparkContext.getOrCreate(conf=create_spark_conf().setMaster("local[4]").set("spark.driver.memory","2g"))

init_engine()

## 1 - Generando dataset aleatoriamente

In [None]:
FEATURES_DIM = 2
data_len = 100

def gen_rand_sample():
    features = np.random.uniform(0, 1, (FEATURES_DIM))
    label = (2 * features).sum() + 0.4
    return Sample.from_ndarray(features, label)

rdd_train = sc.parallelize(range(0, data_len)).map( lambda i: gen_rand_sample() )

## 2 - Definiendo los parametros

In [None]:
learning_rate = 0.2
training_epochs = 5
batch_size = 4
n_input = FEATURES_DIM
n_output = 1 

def linear_regression(n_input, n_output):
    # Definimos nuestro modelo
    model = Sequential()  
    model.add(Linear(n_input, n_output))
 
    return model

model = linear_regression(n_input, n_output)

## 3 - Definiendo el optimizador y entrenando el modelo

In [None]:
# Definimos el optimizador
optimizer = Optimizer(
    model=model,
    training_rdd=rdd_train,
    criterion=MSECriterion(),
    optim_method=SGD(learningrate=learning_rate),
    end_trigger=MaxEpoch(training_epochs),
    batch_size=batch_size)

In [None]:
# Comenzamos el entrenamiento
trained_model = optimizer.optimize()

## 4 - Prediccion en datos de entrenamiento

In [None]:
predict_result = trained_model.predict(rdd_train)
p = predict_result.take(5)

print("predict predict: \n")
for i in p:
    print(str(i) + "\n")

## 5 - Evaluacion del modelo 

In [None]:
def test_predict(trained_model):
    np.random.seed(100)
    total_length = 10
    features = np.random.uniform(0, 1, (total_length, 2))
    label = (features).sum() + 0.4
    predict_data = sc.parallelize(range(0, total_length)).map(
        lambda i: Sample.from_ndarray(features[i], label))
    
    predict_result = trained_model.predict(predict_data)
    p = predict_result.take(6)
    ground_label = np.array([[-0.47596836], [-0.37598032], [-0.00492062],
                                 [-0.5906958], [-0.12307882], [-0.77907401]], dtype="float32")
    mse = ((p - ground_label) ** 2).mean()
    print mse
    
test_predict(trained_model)