#  **Desafio House Rocket**
 
A ideia do desafio é poder comprar casas com preço baixo e fazer a revenda com os preços mais altos.
 
Assim, encontrar bons negócios dentro do portfólio disponível, ou seja, encontrar casas com preço mais baixo, em ótimas localizações e que tenham um ótimo potencial de revenda por um preço mais alto.

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# 1 - Importando as bibliotecas

In [2]:
import pandas as pd
pd.set_option('display.float_format', lambda x: '%.2f' % x)

import numpy as np

from geopy.geocoders import Nominatim

# 2 - Importando a base de dados

In [3]:
PATH = '/content/drive/MyDrive/Colab Notebooks/meigarom/base/kc_house_data.csv'
data = pd.read_csv(PATH)
data.head()

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,7129300520,20141013T000000,221900.0,3,1.0,1180,5650,1.0,0,0,3,7,1180,0,1955,0,98178,47.51,-122.26,1340,5650
1,6414100192,20141209T000000,538000.0,3,2.25,2570,7242,2.0,0,0,3,7,2170,400,1951,1991,98125,47.72,-122.32,1690,7639
2,5631500400,20150225T000000,180000.0,2,1.0,770,10000,1.0,0,0,3,6,770,0,1933,0,98028,47.74,-122.23,2720,8062
3,2487200875,20141209T000000,604000.0,4,3.0,1960,5000,1.0,0,0,5,7,1050,910,1965,0,98136,47.52,-122.39,1360,5000
4,1954400510,20150218T000000,510000.0,3,2.0,1680,8080,1.0,0,0,3,8,1680,0,1987,0,98074,47.62,-122.05,1800,7503


# 3 - Convertendo a Data

In [4]:
data['date'] = pd.to_datetime(data['date'])

# 4 - Estatística Descritiva dos dados

## 4.1 - Tendência Central - Média e Mediana

In [5]:
num_attributes = data.select_dtypes(include=['int64', 'float64'])
num_attributes.dtypes

id                 int64
price            float64
bedrooms           int64
bathrooms        float64
sqft_living        int64
sqft_lot           int64
floors           float64
waterfront         int64
view               int64
condition          int64
grade              int64
sqft_above         int64
sqft_basement      int64
yr_built           int64
yr_renovated       int64
zipcode            int64
lat              float64
long             float64
sqft_living15      int64
sqft_lot15         int64
dtype: object

In [6]:
media = pd.DataFrame(num_attributes.apply(np.mean, axis = 0))
mediana = pd.DataFrame(num_attributes.apply(np.median, axis = 0))

## 4.2 - Dispersão - Mínimo, Máximo e o Desvio Padrão

In [7]:
std = pd.DataFrame(num_attributes.apply(np.std, axis = 0))
min = pd.DataFrame(num_attributes.apply(np.min, axis = 0))
max = pd.DataFrame(num_attributes.apply(np.max, axis = 0))

## 4.3 - União das métricas

In [8]:
df1 = pd.concat([max, min, media, mediana, std], axis = 1).reset_index()
df1.columns = ['attributes', 'maximo', 'minimo', 'media', 'mediana', 'std']
df1

Unnamed: 0,attributes,maximo,minimo,media,mediana,std
0,id,9900000190.0,1000102.0,4580301520.86,3904930410.0,2876499023.43
1,price,7700000.0,75000.0,540088.14,450000.0,367118.7
2,bedrooms,33.0,0.0,3.37,3.0,0.93
3,bathrooms,8.0,0.0,2.11,2.25,0.77
4,sqft_living,13540.0,290.0,2079.9,1910.0,918.42
5,sqft_lot,1651359.0,520.0,15106.97,7618.0,41419.55
6,floors,3.5,1.0,1.49,1.5,0.54
7,waterfront,1.0,0.0,0.01,0.0,0.09
8,view,4.0,0.0,0.23,0.0,0.77
9,condition,5.0,1.0,3.41,3.0,0.65


# 5 - Respondendo dúvidas

## 5.1 - Criar uma nova coluna chamada 'dormitory_type'

In [9]:
# Condições para criar a nova coluna:
#  - Se o valor da coluna 'bedrooms' for igual a 1 => 'studio'
#  - Se o valor da coluna 'bedrooms' for igual a 2 => 'apartament'
#  - Se o valor da coluna 'bedrooms' for maior a 2 => 'house'

data['dormitory_type'] = 'NaN'

data['dormitory_type'] = data['bedrooms'].apply(lambda x: 'studio' if x == 1 else
                                                          'apartament' if x == 2 else
                                                          'house' )

data.head()

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15,dormitory_type
0,7129300520,2014-10-13,221900.0,3,1.0,1180,5650,1.0,0,0,3,7,1180,0,1955,0,98178,47.51,-122.26,1340,5650,house
1,6414100192,2014-12-09,538000.0,3,2.25,2570,7242,2.0,0,0,3,7,2170,400,1951,1991,98125,47.72,-122.32,1690,7639,house
2,5631500400,2015-02-25,180000.0,2,1.0,770,10000,1.0,0,0,3,6,770,0,1933,0,98028,47.74,-122.23,2720,8062,apartament
3,2487200875,2014-12-09,604000.0,4,3.0,1960,5000,1.0,0,0,5,7,1050,910,1965,0,98136,47.52,-122.39,1360,5000,house
4,1954400510,2015-02-18,510000.0,3,2.0,1680,8080,1.0,0,0,3,8,1680,0,1987,0,98074,47.62,-122.05,1800,7503,house


## 5.2 - Definindo níveis de preço 

In [10]:
# Os níveis podem ser:
# 0 até 321.950       = 0
# 321.950 até 450.000 = 1
# 450.000 até 645.000 = 2
# Acima de 645.000    = 3

data['level'] = np.NaN

data['level'] = data['price'].apply(lambda x: 0 if x < 321950 else
                                    1 if x >= 321950 and x < 450000 else
                                    2 if x >= 450000 and x < 645000 else
                                    3)

## 5.3 - Utilizando API para agregar informação de endereço

In [11]:
#geolocator = Nominatim(user_agent = 'geopiExercices')

In [12]:
#response = geolocator.reverse('47.51,-122.26')

In [13]:
#response.raw['address']