In [8]:
import pandas as pd 
import numpy as np
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
import plotly.express as px 


def show_dimmensions( data ):
    print('Number of rows: {}'.format( data.shape[0] ))
    print('Number of columns:{}'.format( data.shape[1] )) 

    return None

def collect_data(path):
    data = pd.read_csv( path )
    
    show_dimmensions(data)
    
    return data

def collect_geodata(data, cols):
    geolocator = Nominatim(user_agent='geopiExercises')

    data.loc[:,cols[0]] = 'na'
    data.loc[:,cols[1]] = 'na'
    
    data = data.head(20)

    for i in range(len(data)):
        query = str(data.loc[i,'lat']) + ',' + str(data.loc[i,'long'])
        response = geolocator.reverse(query)

        if cols[0] in response.raw['address']:
            data.loc[i,'house_number'] = response.raw['address'][cols[0]]
        elif cols[1] in response.raw['address']:
            data.loc[i,'road'] = response.raw['address'][cols[1]]
    return data


#processing and transformation of the data 
def processing_data(data):

    data['date'] = pd.to_datetime(data['date'])
    
    num_attributes = data.select_dtypes( include = ['int64','float64'])

    pd.set_option('display.float_format',lambda x: '%.3f' % x)

    media   = pd.DataFrame(num_attributes.apply(np.mean, axis=0))
    mediana = pd.DataFrame(num_attributes.apply(np.median, axis=0))

    std = pd.DataFrame(num_attributes.apply(np.std,axis=0))
    min_ = pd.DataFrame(num_attributes.apply(np.min,axis=0))
    max_ = pd.DataFrame(num_attributes.apply(np.max,axis=0))


    df1 = pd.concat([max_,min_,std,media,mediana],axis=1).reset_index()
    df1.columns = ['atributes','maximo','minimo','std','media','mediana']

    show_dimmensions(df1)


    data['build_type'] = 'Na'
    
    for i in range(len(data)):
        if data.loc[i,'bedrooms'] == 1:
            data.loc[i,'build_type'] = 'studio'

        elif data.loc[i,'bedrooms'] == 2:
            data.loc[i,'build_type'] = 'apartment'

        else:
            data.loc[i,'build_type'] = 'house'

    data['level'] = 'NA'

    for i in range(len(data)):
        if data.loc[i,'price']<= 321950:
            data.loc[i,'level'] = 0
        
        elif (data.loc[i,'price']> 321950) & (data.loc[i,'price']<=450000):
            data.loc[i,'level'] = 1
        
        elif (data.loc[i,'price'] > 450000) & (data.loc[i,'price']<=650000):
            data.loc[i,'level'] = 2
        
        else:
            data.loc[i,'level'] = 3

    cols = ['road','house_number']
    df = data.head(20)
    df2 = collect_geodata(df,cols)
    return df2

    return data



def data_load(data):
    data = data[['id','price','lat','long','level']].copy()

    fig = px.scatter_mapbox(data, lat='lat',lon='long',size='price',         color_continuous_scale=px.colors.cyclical.IceFire)

    fig.update_layout(mapbox_style='open-street-map')
    fig.update_layout(height=600,margin={'t':0,'l':0,'b':0,'r':0})
    fig.show()

    return None

if __name__ == '__main__':
    #extract
    data_raw = collect_data('house.csv')

    #transform
    data_transform = processing_data(data_raw)

    #load
    data_load(data_transform)

        
    

Number of rows: 21613
Number of columns:21
Number of rows: 20
Number of columns:6
