#### Imports

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px

#### Read csv

In [2]:
flats = pd.read_csv('../database.csv',sep=',',dtype='unicode')

**Convert strings to numeric**

In [3]:
flats = flats.apply(pd.to_numeric, errors='ignore')

### Prepare dataframe for visualising

**Create a dataframe with addresses**

In [4]:
addresses_df = flats['address'].str.replace("[","").str.replace(" ","").str.replace("]","").str.replace('"',"").str.split(",",expand=True)
for addr in list(addresses_df):
    addresses_df = addresses_df.rename(columns={addr : 'addr_'+str(addr) })

**Choose flats that have location, price and area**

In [5]:
flats_info = flats.loc[(flats.latitude.notna()) & (flats.total_price.notna()) & (flats.total_area.notna())]

**Choose flats from [city]**

In [6]:
city = 'Москва'

In [27]:
flats_info_city = flats_info.loc[addresses_df.iloc[:, 0] == city]
flats_info_city = flats_info_city[['Number_of_rooms', 'total_price', 'total_area', 'latitude' , 'longitude']]
flats_info_city = pd.concat([flats_info_city, addresses_df], axis=1, join='inner')

#### Choose only those districts, that have more than 5 ads in it.
We need it to exclude flats with incorrect names

In [28]:
districts_city = flats_info_city.groupby('addr_1')
districts_city_count = flats_info_city.groupby('addr_1')['addr_0'].count()
districts_city_count = districts_city_count.loc[flats_info_city.groupby('addr_1')['addr_0'].count() > 20]
flats_info_city = flats_info_city.loc[flats_info_city.addr_1.isin(districts_city_count.index)]

#### Visualize location of ads

In [29]:
px.set_mapbox_access_token('pk.eyJ1Ijoibmlrc29iOTciLCJhIjoiY2s4cGs3NzRnMDAxODNnbnR0M3QxNHl5NyJ9.Ppwf_rp0v7AP5-VepAWydw')

fig = px.scatter_mapbox(flats_info_city, lat="latitude", lon="longitude", color='addr_1', size_max=15, zoom=10)
fig.show()

#### Visualize number of ads in districts
    Find median value for a district.
    Count number of ads in it.
    Draw circle of corresponding size

In [30]:
districts_city.median()
districts_city_new = pd.concat([districts_city.median(), districts_city_count.to_frame('count')], axis=1, join='inner')

In [31]:
px.set_mapbox_access_token('pk.eyJ1Ijoibmlrc29iOTciLCJhIjoiY2s4cGs3NzRnMDAxODNnbnR0M3QxNHl5NyJ9.Ppwf_rp0v7AP5-VepAWydw')

fig = px.scatter_mapbox(districts_city_new, lat="latitude", lon="longitude", size='count', size_max=100, color=districts_city_new.index, zoom=10)
fig.show()

#### Info about city

In [32]:
print( 'Number of ads in ', city, ' :', flats_info_city.addr_0.count())
print( 'Number of districts in ', city, ' :', len(districts_city_count))

Number of ads in  Москва  : 10171
Number of districts in  Москва  : 25
