<a href="https://colab.research.google.com/github/LuisPeMoraRod/AI-Laboratories/blob/main/Lab7_ErrorRates.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Ejercicio 1.
Genere un set de datos con comportamiento lineal (puede utilizar una librería para generar este set) de 100 mil datos, calcule la regresión lineal. Puede reutilizar los datos del lab de Regresión Lineal (5 pts)

In [49]:
# import libraries
from sklearn.datasets import make_regression
# from sklearn.metrics import mean_absolute_error
# from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt #for plotting
import torch
import numpy as np
import plotly.graph_objects as go
import statsmodels.api as sm
import pandas as pd
import datetime

In [58]:
N_SAMPLES = 100000 #number of samples

def get_linear_dataset(samples_num: int):
  N_FEATURES = 1 #number of features
  N_INFORMATIVE = 1 #number of useful features
  NOISE = 10 #bias and standard deviation of the guassian noise
  x, y, coef = make_regression(n_samples=samples_num,
                              n_features=N_FEATURES,
                              n_informative=N_INFORMATIVE, 
                              noise=NOISE,
                              coef=True, # return the coefficients of the underlying linear model
                              random_state=0) #set for same data points for each run

  # Scale feature x (students' height) to range [1.20, 1.40]
  x = np.interp(x, (x.min(), x.max()), (1.20, 1.40))
  x_t = np.transpose(x)[0]
  x_data = torch.tensor(x_t, dtype=torch.float64)

  # Scale target y (students' weight) to range [25,40] 
  y = np.interp(y, (y.min(), y.max()), (25, 40))
  y_data = torch.tensor(y, dtype=torch.float64)
  return (x_data, y_data)

x_data, y_data = get_linear_dataset(N_SAMPLES)

print(x_data)
print(y_data)

tensor([1.3287, 1.2918, 1.3016,  ..., 1.3029, 1.3380, 1.3173],
       dtype=torch.float64)
tensor([34.1849, 31.4623, 32.6545,  ..., 32.5608, 34.9858, 33.6121],
       dtype=torch.float64)


### Ejercicio 2
Calcule diferentes índices de error sobre la regresión. Los algoritmos deben ser programados por usted. No se permite que utilice métodos ya programados de librerías. 

In [59]:
def linearRegression (array_X,array_Y):
  #x and y average
  x_avg = torch.mean(array_X)
  y_avg = torch.mean(array_Y)

  #data needed to calculate b and a
  xi_xm = torch.add(array_X, -(x_avg))
  yi_ym = torch.add(array_Y, -(y_avg))

  num = torch.mul(xi_xm,yi_ym)
  den = torch.pow(xi_xm,2)

  num_sum = torch.sum(num)
  den_sum = torch.sum(den)

  #Linear regression y = a + bx
  #b calculation
  b = num_sum / den_sum
  #a calculation
  a = y_avg - (b*x_avg)
  
  return (a,b)

a, b = linearRegression(x_data, y_data)
print(f'a = {a}')
print(f'b = {b}')

a = -65.28086377653793
b = 75.11420294360995


#### a) Calcule el Error Medio Absoluto (MAE). (20 pts)  
Para calcular esto se utiliza $MAE = \frac{1}{m}* \sum_{i=1}^{m}|\hat t_i - t_i|
$, donde $m$ es la cantidad de observaciones, $\hat t_i$ es el valor predicho de la observación y $t_i$ es el valor real de la observación.

In [60]:
# MAE method
def meanAbsoluteError(a,b,x):
  a = a.clone().detach() #slope
  b = b.clone().detach()
  y = a + b * x #Calculates the estimated values for "y"

  #Convert 1xN matrix to an array with N elements
  y = y.squeeze() #Y_estimated
  y_i = y_data.squeeze() #Y_real

  #Calculations
  absValue = torch.abs(y - y_i) #|Y_estimated - Y_real|
  sum_y_i = absValue.sum()
  mae = sum_y_i/N_SAMPLES
  return mae

result = meanAbsoluteError(a,b, x_data)
print(f'MAE = {result}')

MAE = 0.1669822528260621


#### b) Calcule el Error Medio Cuadrático (MSE). (20 pts) 
Para calcular el MSE se utiliza  $MSE = \frac{1}{m}* \sum_{i=1}^{m}(real_i - estimated_i)^2$, donde M es la cantidad de observaciones.

In [61]:
# MSE method
def meanSquaredError(a,b,x):
  a = a.clone().detach() #slope
  b = b.clone().detach()
  y = a + b * x #Calculates the estimated values for "y"
  
  #Convert 1xN matrix to an array with N elements
  y = y.squeeze() #Y_estimated
  y_i = y_data.squeeze() #Y_real

  #Calculations
  squared = torch.pow(y_i - y, 2) #(Y_real - Y_estimated)^2
  sum_squared = squared.sum()
  mse = sum_squared/N_SAMPLES
  return mse

result = meanSquaredError(a,b,x_data)
print(f'MSE = {result}')

MSE = 0.04364129076169469


#### c) Calcule el Root Mean Squared Error (RMSE). (20 pts) 
Para calcular  el RMSE se utiliza: $ RMSE = \sqrt{\frac{1}{m} * \sum_{i=1}^{m}(\hat t_i - t_i)^2}$, donde m es la cantidad de observaciones


In [62]:
# RMSE method
def rootMediumSquaredError(a,b,x):
  a = a.clone().detach() #slope
  b = b.clone().detach()
  y = a + b * x #Calculates the estimated values for "y"
  
  #Convert 1xN matrix to an array with N elements
  y = y.squeeze() #Y_estimated
  y_i = y_data.squeeze() #Y_real

  #Calculations
  squared = torch.pow(y_i - y, 2) #(Y_real - Y_estimated)^2
  sum_squared = squared.sum()
  mse = sum_squared/N_SAMPLES
  rmse = torch.sqrt(mse)
  return rmse

result = rootMediumSquaredError(a,b,x_data)
print(f'RMSE = {result}')

RMSE = 0.20890498022233622


### Ejercicio 3
Grafique en 2D las observaciones y sus errores

In [63]:
samples = 100 #number of samples to graph (100000 is excesive for the plotter)

df = pd.DataFrame({'X': x_data[0:samples-1], 'Y':y_data[0:samples-1]})

# regression
df['bestfit'] = sm.OLS(df['Y'],sm.add_constant(df['X'])).fit().fittedvalues

# plotly figure setup
fig=go.Figure()
fig.add_trace(go.Scatter(name=' Height vs Weight', x=df['X'], y=df['Y'].values, mode='markers'))
fig.add_trace(go.Scatter(name='line of best fit', x=x_data, y=df['bestfit'], mode='lines'))


# plotly figure layout
fig.update_layout(title=f'First {samples} observations of the dataset', xaxis_title = 'Height (m)', yaxis_title = 'Weight (kg)')

# retrieve x-values from one of the series
xVals = fig.data[0]['x']

errors = {} # container for prediction errors

# organize data for errors in a dict
for d in fig.data:
    errors[d['mode']]=d['y']

shapes = [] # container for shapes

# make a line shape for each error == distance between each marker and line points
for i, x in enumerate(xVals):
    shapes.append(go.layout.Shape(type="line",
                                    x0=x,
                                    y0=errors['markers'][i],
                                    x1=x,
                                    y1=errors['lines'][i],
                                    line=dict(
                                        #color=np.random.choice(colors,1)[0],
                                        color = 'black',
                                        width=1),
                                    opacity=0.5,
                                    layer="above")
                 )

# include shapes in layout
fig.update_layout(shapes=shapes)
fig.show()

### Ejercicio 4
Saque conclusiones a partir de los índices de error calculados en este laboratorio.
Sea analítico y extensivo en sus conclusiones