# Limpeza dos dados

In [None]:
# verificando a quantidade de dados duplicados
data['id'].duplicated().value_counts()

In [None]:
# removendo as linhas duplicadas
data.drop_duplicates(subset='id', keep='last', inplace=True)

# verificando o resultado após a remoção dos dados duplicados
data.duplicated().value_counts()

# Exercícios:

## 1. Organizar o código da aula 04 em funções.

### ANTES (como foi criado na aula 04)
* Código 1 (mapa)

In [9]:
import pandas as pd
import ipywidgets as widgets
from ipywidgets import fixed
import plotly.express as px # biblioteca para mapas

In [10]:
df = pd.read_csv('datasets/kc_house_data.csv')

In [11]:
df['level'] = df['price'].apply( lambda x:0 if x< 321950 else
                                          1 if( x > 321950) & ( x < 450000) else
                                          2 if( x > 450000) & ( x < 645000) else 3)


df['condition_type'] = df['condition'].apply( lambda x: 'bad' if x <= 2  else
                                                        'good' if x == 5 else
                                                        'regular' )


style = {'description_width': 'initial'}

# Iterative buttons 1
living_room_size = widgets.IntSlider(
    value = 13540,
    min = 290,
    max = 13540,
    step = 1,
    description='Minimum living size',
    disable=False,
    style = style
)

# Iterative buttons 2
minimum_bathrooms = widgets.IntSlider(
    value = 8,
    min = 0,
    max = 8,
    step = 1,
    description='Minimum number of bathrooms',
    disable=False,
    style = style
)

# Iterative buttons 3
maximum_price = widgets.IntSlider(
    value = 7700000,
    min = 75000,
    max = 7700000,
    step = 1,
    description='Maximum price',
    disable=False,
    style = style
)

# Iterative buttons 4
maximum_basement_area_size = widgets.IntSlider(
    value = 4820,
    min = 0,
    max = 4820,
    step = 1,
    description='Maximum basement area size',
    disable=False,
    style = style
)

# Iterative buttons 5
condition_type = widgets.Dropdown(
    options= df['condition_type'].unique().tolist(),
    value='good',
    description='Condition',
    disable=False
)

# Iterative buttons 6
yr_built_select = widgets.Dropdown(
    options= df['yr_built'].unique().tolist(),
    value=1900,
    description='Yr_built',
    disable=False
)

##############################################################################################################################

def update_map( df, living_room_limit, minimum_bathrooms_limit, maximum_price_limit, maximum_basement_area_size_limit,
              condition_type_defined, yr_built_select_defined):
    houses = df[(df['sqft_living'] <= living_room_limit) & 
                (df['bathrooms'] <= minimum_bathrooms_limit) &
                (df['price'] <= maximum_price_limit) &
                (df['sqft_basement'] <= maximum_basement_area_size_limit) &
                (df['condition_type'] == condition_type_defined) &
                (df['yr_built'] == yr_built_select_defined)]
    
##############################################################################################################################

    # plotando o gráfico
    fig = px.scatter_mapbox(houses,
                            lat='lat',
                            lon='long',
                            color='level',
                            size='price',
                            color_continuous_scale=px.colors.cyclical.IceFire,
                            size_max=15,
                            zoom=10 )
                        
    fig.update_layout( mapbox_style='open-street-map')
    fig.update_layout( height=600, margin={'r':0, 't':0, 'l':0, 'b':0})
    fig.show()

In [12]:
widgets.interactive( update_map, df=fixed( df ), living_room_limit=living_room_size, minimum_bathrooms_limit=minimum_bathrooms,
                   maximum_price_limit=maximum_price, maximum_basement_area_size_limit=maximum_basement_area_size, 
                    condition_type_defined=condition_type, yr_built_select_defined=yr_built_select)

interactive(children=(IntSlider(value=13540, description='Minimum living size', max=13540, min=290, style=Slid…

### DEPOIS  (códigos formatados seguindo a lógica de ETL)
* Código 1 (mapa)

In [6]:
# Libraries
import pandas as pd
import ipywidgets as widgets
from ipywidgets import fixed
import plotly.express as px


    # Functions
def data_collect( path ):
    # Function 1
    # load dataset
    data = pd.read_csv( path)
    
    return data


def data_transform( data ):
    # Function 2: Criar colunas com determinadas condições:
    data['level'] = data['price'].apply( lambda x:0 if x< 321950 else
                                              1 if( x > 321950) & ( x < 450000) else
                                              2 if( x > 450000) & ( x < 645000) else 3)

    data['condition_type'] = data['condition'].apply( lambda x: 'bad' if x <= 2  else
                                                                'good' if x == 5 else
                                                                'regular' )
    
    # Function 3: Criar botões iterativos para o mapa:
    style = {'description_width': 'initial'}
    
    # Iterative buttons 1
    living_room_size = widgets.IntSlider(
        value = 13540,
        min = 290,
        max = 13540,
        step = 1,
        description='Minimum living size',
        disable=False,
        style = style)

    # Iterative buttons 2
    minimum_bathrooms = widgets.IntSlider(
        value = 8,
        min = 0,
        max = 8,
        step = 1,
        description='Minimum number of bathrooms',
        disable=False,
        style = style)

    # Iterative buttons 3
    maximum_price = widgets.IntSlider(
        value = 7700000,
        min = 75000,
        max = 7700000,
        step = 1,
        description='Maximum price',
        disable=False,
        style = style)

    # Iterative buttons 4
    maximum_basement_area_size = widgets.IntSlider(
        value = 4820,
        min = 0,
        max = 4820,
        step = 1,
        description='Maximum basement area size',
        disable=False,
        style = style)

    # Iterative buttons 5
    condition_type = widgets.Dropdown(
        options= data['condition_type'].unique().tolist(),
        value='good',
        description='Condition',
        disable=False)

    # Iterative buttons 6
    yr_built_select = widgets.Dropdown(
        options= data['yr_built'].unique().tolist(),
        value=1900,
        description='Yr_built',
        disable=False)
 
    return data 


def data_load( data, living_room_limit, minimum_bathrooms_limit, maximum_price_limit, maximum_basement_area_size_limit,
             condition_type_defined, yr_built_select_defined):
   
    # Function 4: Definindo filtros e plotando o mapa:
    houses = data[(data['sqft_living'] <= living_room_limit) & 
                (data['bathrooms'] <= minimum_bathrooms_limit) &
                (data['price'] <= maximum_price_limit) &
                (data['sqft_basement'] <= maximum_basement_area_size_limit) &
                (data['condition_type'] == condition_type_defined) &
                (data['yr_built'] == yr_built_select_defined)]
    
    # plotando o gráfico
    fig = px.scatter_mapbox(houses,
                            lat='lat',
                            lon='long',
                            color='level',
                            size='price',
                            color_continuous_scale=px.colors.cyclical.IceFire,
                            size_max=15,
                            zoom=10 )
                        
    fig.update_layout( mapbox_style='open-street-map')
    fig.update_layout( height=600, margin={'r':0, 't':0, 'l':0, 'b':0})
    fig.show()

    # Function 5: Mostrando os botões com os filtros definidos: 
    widgets.interactive(data_load, data=fixed( data ), living_room_limit=living_room_size, 
                        minimum_bathrooms_limit=minimum_bathrooms, maximum_price_limit=maximum_price, 
                        maximum_basement_area_size_limit=maximum_basement_area_size, condition_type_defined=condition_type, 
                        yr_built_select_defined=yr_built_select) 
    
    return None

##############################################################################################################################

if __name__ == '__main__':
    # ETL
    
    # Collect
    #Function 1
    data_raw = data_collect( 'datasets/kc_house_data.csv')
    
    # Transform
    data_processing = data_transform( data_raw)
    
#     # Load
    data_load( data_processing)


TypeError: data_load() missing 6 required positional arguments: 'living_room_limit', 'minimum_bathrooms_limit', 'maximum_price_limit', 'maximum_basement_area_size_limit', 'condition_type_defined', and 'yr_built_select_defined'