<html>
<h1  align="center"> Desafío: Modelo de Prediccion de Precio de Propiedades
    <hr>
    <img align="center;" style="background-color:
#253bff"; color:"#ff5d25";" src="https://www.iberdrola.com/documents/20125/40921/machine_learning_746x419.jpg/15ff7571-4cfc-d9f0-5ef4-9c2e9306ad88?t=1627968463400"; width="30%"; height="20%">

</h1>
</html>



<h1 align="center">Índice del análisis</h1>

- [**Data Wrangling**](#Data-Wrangling)
- [**Visualizaciones Post Limpieza**](#Visualizaciones-Post-Limpieza)

<html><h1 align="center", style="background: #414ce8; color:white">Data Wrangling</h1></html>

[**Volver al Inicio**](#Índice-del-análisis) 

In [None]:
import pandas as pd
import numpy as np
import plotly.io as pio
import plotly.express as px
import plotly.figure_factory as ff
import matplotlib.pyplot as plt
import matplotlib.patches
import warnings



warnings.filterwarnings('ignore')


df_caba = pd.read_csv('df_caba.csv', index_col = 'index')

In [None]:
df_caba.head(3)

In [None]:
df_caba = df_caba.loc[df_caba['place_name'] != 'Capital Federal']
df_caba.reset_index(drop=True, inplace=True)

In [None]:
df_caba.shape

In [None]:
df_caba.head(5)

In [None]:
round(df_caba.isnull().sum() / df_caba.shape[0],2).sort_values(ascending = False)

In [None]:
def getPlotGroup(data, number_color):
    x = data.groupby('place_name')['price_usd_per_m2'].std().sort_values(ascending = False).index
    y = data.groupby('place_name')['price_usd_per_m2'].std().sort_values(ascending = False).values
   
    df = pd.DataFrame({'x': x, 'y': y})
    fig = px.bar(df, x=x, y=y,
                title= 'Desvio Standard del Precio en USD por m2 ',
                color_discrete_sequence=[px.colors.qualitative.Alphabet[number_color]],
                width=800, height=600,
                template="simple_white"
                )
    return fig.show()
    

getPlotGroup(df_caba,  7)

Grafico en 3D donde relacionamos las siguientes variables por zona: price_usd_per_m2, surface_covered_in_m2 y ambientes.

In [None]:
fig = px.scatter_3d(df_caba, 
                    title="3D Plot",
                    x='price_usd_per_m2', 
                    y='surface_covered_in_m2', 
                    z='ambientes', 
                    color='place_name')
fig.update_layout(template="plotly_dark",
                  width=1000,
                  height=750)

fig.show()

Grafico en 3D donde relacionamos las siguientes variables por tipo de propiedad: price_usd_per_m2, surface_covered_in_m2 y ambientes.

In [None]:
fig = px.scatter_3d(df_caba, 
                    title="3D Plot",
                    x='price_usd_per_m2', 
                    y='surface_covered_in_m2', 
                    z='ambientes', 
                    color='property_type')
fig.update_layout(template="plotly_dark",
                  width=1000,
                  height=750)

fig.show()

In [None]:
df_caba.columns

<h3> Limpieza de Datos </h3>

In [None]:
df_caba_model = df_caba[['price_usd_per_m2','property_type','place_name', 'price', 'price_aprox_usd', 'surface_total_in_m2', 'surface_covered_in_m2','rooms', 'ambientes', 'parrilla']]
df_caba_model.head(5)

In [None]:
df_caba_filter = df_caba_model.loc[df_caba_model['place_name'].isin(['Boedo', 'Puerto Madero', 'Palermo', 'Saavedra'])]
levels, categories = pd.factorize(df_caba_filter['place_name'])
colors = [plt.cm.tab10(i) for i in levels] 
handles = [matplotlib.patches.Patch(color=plt.cm.tab10(i), label=c) for i, c in enumerate(categories)]


plt.scatter(df_caba_filter['surface_covered_in_m2'],df_caba_filter['price_usd_per_m2'], s=30,
            alpha=0.2, c=colors)
plt.xlabel('surface_covered_in_m2')
plt.ylabel('price_usd_per_m2')
plt.legend(handles=handles, title='place_name')
plt.rcParams["figure.figsize"] = (10,8)
plt.show()


In [None]:
df_caba_filter = df_caba_model.loc[df_caba_model['place_name'].isin(['Boedo'])]
levels, categories = pd.factorize(df_caba_filter['property_type'])
colors = [plt.cm.tab10(i) for i in levels] 
handles = [matplotlib.patches.Patch(color=plt.cm.tab10(i), label=c) for i, c in enumerate(categories)]


plt.scatter(np.log(df_caba_filter['surface_covered_in_m2']),np.log(df_caba_filter['price_usd_per_m2']), s=30,
            alpha=0.3, c=colors)
plt.xlabel('surface_covered_in_m2')
plt.ylabel('price_usd_per_m2')
plt.legend(handles=handles, title='property_type')
plt.rcParams["figure.figsize"] = (10,8)
plt.show()

Completamos la columna ambientes combinandola con rooms:

In [None]:
df_caba_model['ambientes'] = df_caba_model.apply(lambda x: x['ambientes'] if x['ambientes'] > 0 else x['rooms'], axis = 1)

In [None]:
df_caba_model.drop(columns=['rooms','price_aprox_usd','price'], inplace = True)

In [None]:
round(df_caba_model.isnull().sum() / df_caba_model.shape[0],2).sort_values(ascending = False)

In [None]:
print('Luego de los drop queda un ',(df_caba_model.dropna().shape[0] / df_caba_model.shape[0]) * 100, '% del Data Set original')

In [None]:
df_caba_model.loc[df_caba['place_name'].isin(['Boedo'])].price_usd_per_m2.value_counts().sort_index(ascending = False).index

Podemos observar valores en precio en dolares por metro cuadrado y superficie cubierta fuera de lo normal en Boedo.

In [None]:

x1 = np.array(df_caba_model.loc[df_caba_model['place_name'].isin(['Boedo'])].price_usd_per_m2.value_counts().sort_index(ascending = False).index)

hist_data = [x1]
group_labels = ['price_usd_per_m2'] 

fig = ff.create_distplot(hist_data, group_labels, bin_size=400)
fig.show()

In [None]:
x1 = np.array(df_caba_model.loc[df_caba_model['place_name'].isin(['Boedo'])].surface_covered_in_m2.value_counts().sort_index(ascending = False).index)

hist_data = [x1]
group_labels = ['surface_covered_in_m2'] 

fig = ff.create_distplot(hist_data, group_labels, bin_size=5)
fig.show()

Limpiamos esos datos del Data Set

In [None]:
index_drop_surface = df_caba_model.loc[(df_caba_model['place_name'].isin(['Boedo'])) & (df_caba_model['surface_covered_in_m2'].isin([324,350,566,600,800]))].index
index_drop_price = df_caba_model.loc[(df_caba_model['place_name'].isin(['Boedo'])) & (df_caba_model['price_usd_per_m2'] > 8000)].index

df_caba_model.drop(index_drop_surface, inplace=True)
df_caba_model.drop(index_drop_price, inplace=True)

In [None]:
df_caba_model.reset_index(drop=True, inplace=True)

Scatter para Boedo:

In [None]:
df_caba_filter = df_caba_model.loc[df_caba_model['place_name'].isin(['Boedo'])]
levels, categories = pd.factorize(df_caba_filter['property_type'])
colors = [plt.cm.tab10(i) for i in levels] 
handles = [matplotlib.patches.Patch(color=plt.cm.tab10(i), label=c) for i, c in enumerate(categories)]


plt.scatter(df_caba_filter['surface_total_in_m2'],df_caba_filter['price_usd_per_m2'], s=30,
            alpha=0.3, c=colors)
plt.xlabel('surface_total_in_m2')
plt.ylabel('price_usd_per_m2')
plt.legend(handles=handles, title='property_type')
plt.rcParams["figure.figsize"] = (10,8)
plt.show()

Scatter previo a la limpieza de valores atipicos para superficie cubierta:

In [None]:
fig = px.scatter(df_caba_model, x="surface_total_in_m2", y="price_usd_per_m2", color="property_type", hover_data=['property_type'])
fig.show()

In [None]:
index_drop_surface_total = df_caba_model.loc[(df_caba_model['surface_total_in_m2'] > 2000)].index
df_caba_model.drop(index_drop_surface_total, inplace=True)
df_caba_model.reset_index(drop=True, inplace=True)

<html><h1 align="center", style="background: #414ce8; color:white">Visualizaciones Post Limpieza</h1></html>

[**Volver al Inicio**](#Índice-del-análisis) 

In [None]:
getPlotGroup(df_caba_model,  3)

In [None]:

fig = px.scatter_3d(df_caba_model, 
                    title="3D Plot",
                    x='price_usd_per_m2', 
                    y='surface_total_in_m2', 
                    z='place_name', 
                    color='property_type')
fig.update_layout(template="plotly_dark",
                  width=1000,
                  height=750)

fig.show()

In [None]:

fig = px.scatter_3d(df_caba_model, 
                    title="3D Plot",
                    x='price_usd_per_m2', 
                    y='surface_total_in_m2', 
                    z='ambientes', 
                    color='place_name')
fig.update_layout(template="plotly_dark",
                  width=1000,
                  height=750)

fig.show()