# Machine Learning -1 "Regresión"
Ejemplo de: https://scikit-learn.org/stable/auto_examples/linear_model/plot_ols.html

In [None]:
# Code source: Jaques Grobler
# License: BSD 3 clause

## Importamos las librerías necesarias

In [1]:

import matplotlib.pyplot as plt
import numpy as np

from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split

## Cargamos los datos y los partimos en entrenamiento y testeo

Los datos tinene esta forma:
X:
0. age age in years
1. sex
2. bmi body mass index
3. bp average blood pressure
4. s1 tc, total serum cholesterol
5. s2 ldl, low-density lipoproteins
6. s3 hdl, high-density lipoproteins
7. s4 tch, total cholesterol / HDL
8. s5 ltg, possibly log of serum triglycerides level
9. s6 glu, blood sugar level
y:
- a quantitative measure of disease progression one year after baseline

In [None]:
# Load the diabetes dataset
diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True)
print(f"diabetes_X.shape: {diabetes_X.shape}")
print(f"diabetes_y.shape: {diabetes_y.shape}")

# dataset description
labels = ["Edad", "sex", "bmi", "Presión", "Colesterol Total", "Colesterol Malo", "Colesterol Bueno", "tch", "ltg", "glu"]
feature = 3
print(f"Vamos a predecir usando la característica {labels[feature]}")

# Use only one feature
diabetes_X = diabetes_X[:, np.newaxis, feature]

# Split the data into training/testing sets
diabetes_X_train, diabetes_X_test, diabetes_y_train, diabetes_y_test = train_test_split(
    diabetes_X, diabetes_y, test_size=0.2, random_state=42)

## Creamos el modelo de regresión lineal y lo entrenamos

In [None]:

# Create linear regression object
regr = linear_model.LinearRegression()

# Train the model using the training sets
regr.fit(diabetes_X_train, diabetes_y_train)


## predecimos los valores de testeo y graficamos los resultados

In [None]:

# Make predictions using the testing set
diabetes_y_pred = regr.predict(diabetes_X_test)

# The coefficients
print("Coefficients: \n", regr.coef_)
# The mean squared error
print("Mean squared error: %.2f" % mean_squared_error(diabetes_y_test, diabetes_y_pred))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(diabetes_y_test, diabetes_y_pred))

# Plot outputs
plt.scatter(diabetes_X_test, diabetes_y_test)
plt.plot(diabetes_X_test, diabetes_y_pred, color="orange")

plt.xticks(())
plt.yticks(())

plt.xlabel(labels[feature])
plt.ylabel("Progresión de la enfermedad")

plt.show()