# Obtención y preparación de datos.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [None]:
df = pd.read_csv('data/Real_estate.csv')
df.head()

https://www.kaggle.com/quantbruce/real-estate-price-prediction
<br>
<br>This real estate dataset was built for regression analysis, linear regression, multiple regression, and prediction models. It includes the date of purchase, house age, location, distance to nearest MRT station, and house price of unit area. 

In [None]:
df = df.drop(['No'], axis = 1)
df.head()

In [None]:
df.columns

In [None]:
df.rename(columns = {'X1 transaction date': 'fecha'}, inplace = True)
df.head()

In [None]:
df.columns

In [None]:
df.rename(columns = {'X2 house age': 'edad'}, inplace = True)
df.rename(columns = {'X3 distance to the nearest MRT station': 'distancia estacion'}, inplace = True)
df.rename(columns = {'X4 number of convenience stores': 'tiendas cercanas'}, inplace = True)
df.rename(columns = {'X5 latitude': 'latitud'}, inplace = True)
df.rename(columns = {'X6 longitude': 'longitud'}, inplace = True)
df.rename(columns = {'Y house price of unit area': 'precio'}, inplace = True)
df.head()

In [None]:
df = df.drop(['fecha'], axis = 1)
df.head()

# Obtención de información de los datos.

### Descripción de los datos. 

In [None]:
df.describe()

### Correlación entre los datos. 

In [None]:
corr = df.corr()
corr

In [None]:
plt.figure(figsize=(6,5))
sns.heatmap(corr, linewidth=0.5,annot=True,cmap="BuPu")
plt.show()

### Utilizando Pairplot para visualizar relaciones entre variables. 

In [None]:
fig = plt.figure()
sns.pairplot(df)
plt.show()

# Creando nuestro primer modelo de regresión líneal.

### Importando la paquetería necesaria.
https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
X = df['distancia estacion']
print(X)

In [None]:
type(X)

In [None]:
X = df["distancia estacion"].values.reshape(-1,1)
print(X)

In [None]:
type(X)

In [None]:
y = df['precio']
y

In [None]:
y = df['precio'].values.reshape(-1,1)
print(y)

In [None]:
type(y)

In [None]:
model = LinearRegression(normalize = True).fit(X,y)
y_prediccion = model.predict(X)
print(y_prediccion)

In [None]:
print(X[0])
print(y[0])
print(y_prediccion[0])

In [None]:
print(X[1])
print(y[1])
print(y_prediccion[1])

In [None]:
for i in range(10):
    print("Distancia")
    print(X[i])
    print("Precio Real")
    print(y[i])
    print("Precio de la Predicción")
    print(y_prediccion[i])
    
    error = (y[i] - y_prediccion[i]) / [y[i]] * 100
    print("Error de la Predicción")
    print(error)
    
    
    print()

In [None]:
plt.scatter(X, y)
plt.show()

In [None]:
x_plot = np.linspace(0,7000).reshape(-1, 1)
y_plot = model.predict(x_plot)

### Graficamos los datos y el modelo.

In [None]:
fig = plt.figure()
# En este "scatter" vamos a graficar los valores Originales
plt.scatter(X, y)

# En este "plot" vamos a graficar una línea recta Roja obtenida por el modelo
plt.plot(x_plot, y_plot,"r-")
plt.show()

### Obtenemos los elementos del modelo y los utilizamos para predecir.

In [None]:
model.coef_

In [None]:
model.intercept_

$ Precio = -0.00726205 * (Distancia) + 45.85142706 $

In [None]:
prediccion = (model.coef_[0]) * 500 + model.intercept_
print(prediccion)

In [None]:
prediccion = (model.coef_[0]) * 5000 + model.intercept_
print(prediccion)

### Alternativa para predecir.

In [None]:
prediccion = model.predict([[500]])
print(prediccion)

In [None]:
prediccion = model.predict([[5000]])
print(prediccion)

### Utilizamos una metríca para verificar el desempeño de nuestro modelo.

In [None]:
from sklearn.metrics import r2_score
r2_score(y, y_prediccion)

# Creando nuestro primer modelo de regresión polinomial.

### Importando la paquetería necesaria.
https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

In [None]:
X = df["distancia estacion"].values.reshape(-1,1)
y = df['precio'].values.reshape(-1,1)

In [None]:
poly = PolynomialFeatures(degree = 2)

In [None]:
x_poly = poly.fit_transform(X)
print(x_poly)

In [None]:
model = LinearRegression().fit(x_poly,y)
y_prediccion = model.predict(x_poly)
print(y_prediccion)

In [None]:
x_plot = np.linspace(0,7000).reshape(-1, 1)
y_plot = model.predict(poly.fit_transform(x_plot))

In [None]:
fig = plt.figure()
plt.scatter(X, y)
plt.plot(x_plot, y_plot,"r-")
plt.show()

### Utilizamos una metríca para verificar el desempeño de nuestro modelo.

In [None]:
from sklearn.metrics import r2_score
r2_score(y, y_prediccion)