In [1]:
%%html
<style>table {display: inline-block}</style>

<center><h1>Austrian Housing Prices - Visualization and Advanced Regression Techniques</h1>
<h4>by Arlin Gruber</h4></center>

 
 ### Goal
The dataset contains 15 explanatory variables that describe a lot of aspects of residential homes in Austria. The data has been mined from a large Austrian property platform over the course of several weeks and contains 100.000 unique advertisements. Detailed information of how this was achieved is available [here](https://github.com/AReburg/Willhaben-Data-Mining). The overall goal is to predict the pricing of a given house/appartment based on the given dataset. The data is split into a train and test data set for me to train and test on. With this dataset the principles of stacking, blending and ensembling techniques shall be explored to predict accurate outcomes.



 **Table of Contents**
 
1. [Installing/Loading Packages](#packages)
    
2. [Reading Data](#data)

3. [Visualization](#visualization) 

4. [Data Preparation](#dataprep)

5. [Geospatial Distribution across Austria](#geospatial_distribution) 
    

### <a name="data">Reading Data</a> 
The data (as previously mentioned) is split into a train and test file with both the features and target variable available in the train.csv file to train on and only the features (without the target variable) in the test set. 
Although I have loaded the files, it is still necessary to also load them in R, which takes a few seconds.   
The data can be found in the db folder.

In [26]:
# import modules/libraries
import warnings 
warnings.simplefilter(action='ignore')
import osmnx as ox
import pandas as pd
import numpy as np
import geopandas as gpd
import time
import chardet
from scipy import stats
import itertools
import os
import pickle
import geojson
from sqlalchemy import create_engine
import re
import sqlite3
from pathlib import Path
from shapely.geometry.polygon import Polygon
from shapely.geometry.multipolygon import MultiPolygon
import chardet
from scipy import spatial
from scipy.spatial import KDTree
from shapely import wkt

cwd = Path().resolve()

# visualisation
import plotly.io as pio
import plotly.graph_objects as go
import plotly.express as px
import plotly.figure_factory as ff
import seaborn as sns
import matplotlib as mpl 
%matplotlib inline 
import matplotlib.pyplot as plt

pd.set_option('display.max_columns', None)

In [40]:
def get_area_code(location):
    """ area code of the real estate
    source: https://stackoverflow.com/questions/16348538/python-regex-for-int-with-at-least-4-digits
    """
    pattern = re.compile(r"(?<!\d)\d{4,7}(?!\d)")
    try:
        return pattern.findall(location)[0]

    except:
        return np.nan

    
def normalize_living_space(area, price):
    """ """
    try:
        return round(price/area, 2)
    except:
        return np.nan
    
    
def tukey_rule(data_frame, column_name):
    """ apply tukey rule to remove outliers """
    data = data_frame[column_name]
    Q1 = data.quantile(0.25)
    Q3 = data.quantile(0.75)
    IQR = Q3 - Q1
    max_value = Q3 + 1.5 * IQR
    min_value = Q1 - 1.5 * IQR

    return data_frame[(data_frame[column_name] < max_value) & (data_frame[column_name] > min_value)]

In [35]:
plz = pd.read_csv(os.path.join(Path(cwd).parent, 'data', 'look-up', 'PLZ_LOOKUP_MERGE.csv'))
plz['PLZ'] = plz['PLZ'].fillna(0).astype("int64")
plz.head()

Unnamed: 0,PLZ,Bestimmungsort,OKZ,Ortschaft,GKZ,GEMNAM,Ort,Bundesland
0,1010,Wien,17223,"Wien,Innere Stadt",90001,Wien,Wien,Wien
1,1020,Wien,17224,"Wien,Leopoldstadt",90001,Wien,Wien,Wien
2,1030,Wien,17225,"Wien,Landstraße",90001,Wien,Wien,Wien
3,1040,Wien,17226,"Wien,Wieden",90001,Wien,Wien,Wien
4,1050,Wien,17227,"Wien,Margareten",90001,Wien,Wien,Wien


In [None]:
con = sqlite3.connect(os.path.join(Path(cwd).parent, 'data', 'real_estate.sqlite'))
df = pd.read_sql_query("SELECT * FROM RealEstate", con)
df['price'] = df['price'].fillna(0)#.astype('int', errors='ignore')
df['price'] = pd.to_numeric(df['price'], errors='coerce')
df['PLZ'] = df.apply(lambda df: get_area_code(df.location), axis=1)
df['PLZ'] = df['PLZ'].fillna(0).astype("int64")

df['area'] = df['wohnflaeche'].str.extract('(\d+)').astype('float')
df['price_m2'] = round(df.apply(lambda x: normalize_living_space(x.area, x.price), axis=1), 1)


In [43]:
df2 = df.merge(plz[['PLZ','Ort', 'Bundesland']], on=['PLZ'], how='left')

In [44]:
df2.head(1)

Unnamed: 0,scrape_date,code,price,type,makler,zusatz,last_change_date,title,url,location,objekttyp,wohnflaeche,zimmer,stockwerke,heizung,keller,garage,barrierefrei,fahrstuhl,zustand,garten,terrasse,verfuegbar,abstellraum,boeden,einbaukueche,dachterrasse,parkplatz,balkon,wintergarten,nutzflaeche,loggia,bautyp,topnummer,carport,wohneinheiten,miete,preis,status,grundflaeche,gesamtflaeche,ausbaustufe,fertigstellung,teilmoebliert_moebliert,teilmoebliert__moebliert,befristung,url_rank,num_img,highlight,agency_address,agency_url,PLZ,area,price_m2,Ort,Bundesland
0,10.6.2022 11:38,410553509,485000.0,condominium,0 %,"EUR 485.000,00 exkl. TG-Platz",09.07.2022,Licht! Liebe! Leben! Neuen Wohnraum grosse 4 Z...,https://www.willhaben.at/iad/immobilien/d/eige...,"Walgaustraße 26+26a, 6832 Röthis, Feldkirch, V...",Wohnung,84 m²,4,1,Solar,yes,yes,yes,yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,6832,84.0,5773.8,Sulz-Röthis,Vorarlberg


### <a name="Outlier">Outlier Removal</a> 

In [51]:
for column in ['price']:
    df2 = tukey_rule(df2, column)

### <a name="visualization">Visualization</a> 

In [56]:
def explore(df, category='rented flat'):
    """ scatter plot (price over living area) of a property type """
    fig = px.scatter(df[(df['type'] == category) & (df['area'] > 0) & (df['price'] > 10) &#(df['price'] <=5000)&
                        (df['area'] <= 350)], x="area", y="price")
    fig.update_layout(width=800, height=400,
            #title_text=format_title("Price per living area", f"Rented flat"),
            font=dict(family="Open Sans"))
    fig.show()

explore(df2, category='rented flat')
#explore(df2, category='condominium')
#explore(df2, category='single-family home') 


#condominium single-family home
#print(df[(df['type'] =='rented flat') & (df['price'] <=5000) & (df['living_area'] >0)& (df['living_area'] <=350)].shape[0])

In [None]:
# clean data
df = df[((df['type'] == 'rented flat') & (df['price'] <8000) & (df['price'] >100))
       | ((df['type'] == 'condominium') & (df['price'] <4000000) & (df['price'] >30000))
       | ((df['type'] == 'single-family home') & (df['price'] <4000000) & (df['price'] >30000)& (df['area'] >10))]
explore(df, category='rented flat')
explore(df, category='condominium')
explore(df, category='single-family home') 

In [None]:
df['rooms'].value_counts().head(10)
df.type.value_counts()
print(f"The data set has unique adverisements: {df.code.nunique()}")

You can see that the target variable "SalePrice" has a skewed distribution. Because I will be predicting a continuous feature, I will be using regressors which perform better if the target variable has a normal distribution with little to no skewdness. Thus, it is important to remember that in a later stage the data needs to be normalized.
### <a name="Data Preperation">Data Preperation and Feature Extraction</a> 

The following two functions help us to count the appearance in the aggregated data and to caluclate the ratio of the properties where a real estate agency is selling.

In [None]:
def calc_comm(raw):
    if len(raw.value_counts()) < 2:
        # With commission -> 100% real estate agency selling
        if raw.value_counts().index.tolist()[0] == True:
            return 1
        # No commission
        else:
            return 0
    else:
        # eq: broker comm./(broker comm. + no broker comm.)
        return round(raw.value_counts().sort_index(ascending=False)[1]/
                     (raw.value_counts().sort_index(ascending=False)[0]+raw.value_counts().sort_index(ascending=False)[1]), 2)


In [None]:
def get_geo_data(selector):
    # Load Austrian geojson 
    if selector == 'municipal':
        link = 'https://raw.githubusercontent.com/ginseng666/GeoJSON-TopoJSON-Austria/master/2021/simplified-99.5/gemeinden_995_geo.json'

    elif selector == 'district':
            link = 'https://raw.githubusercontent.com/ginseng666/GeoJSON-TopoJSON-Austria/master/2021/simplified-99.9/bezirke_999_geo.json'
            
        
    elif selector == 'state':
        link = 'https://raw.githubusercontent.com/ginseng666/GeoJSON-TopoJSON-Austria/master/2021/simplified-99.9/laender_999_geo.json'
   
    with urlopen(link) as response:
        counties = json.load(response)
        
    return counties


def format_title(title, subtitle=None, subtitle_font_size=14):
        """        """
        title = f'<b>{title}</b>'
        if not subtitle:
            return title
        subtitle = f'<span style="font-size: {subtitle_font_size}px;">{subtitle}</span>'
        return f'{title}<br>{subtitle}'

    
def geo_heatmap(df, selector='state', housing_category='rented flat'):
    """ selector specifies the geographic resolution """
    df['count'] = np.nan
    feat_key = ''
    locations = ''
    hover_data = ''
    opacity = 0.95
    title = ''
    category = ''
    if housing_category == 'rented flat':
        df.type.head()
        df = df[(df['type'].str.contains('rented flat', na = False)) & (df['rooms'] < 6)].copy()
        category = 'rented flat'
    elif housing_category == 'condominium':
        df.type.head()
        df = df[(df['type'].str.contains('condominium', na = False))].copy()
        category = 'condominium'
    elif housing_category == 'single-family home':
        df.type.head()
        df = df[(df['type'].str.contains('single-family home', na = False))].copy()
        category = 'single-family home'
        # df2.sort_values("price_sqrt", ascending=False)
        # df4.loc[df4.commission.isna()==True,'price_sqrt'] =1

    if selector == 'municipal':
        feat_key = "properties.iso"
        locations = "GKZ"
        hover_data = ["GKZ", "Bezirk", "Bundesland", "count"]
        hover_name = "GKZ"
        title = ["Austrian Housing Market", f"{category} prices on municipal level"]
        dfx = df.groupby(['GKZ'], as_index=False).agg({'price': 'mean', 'price_sqrt': 'mean', 'Bezirk': 'first', 'Bundesland': 'first', 'commission':calc_comm, 'count':'size'})

    elif selector == 'district':
        feat_key = "properties.name"
        locations = "Bezirk"
        hover_data = ["Bundesland", "Bezirk", "count"]
        hover_name = "Bezirk"
        opacity = 0.65
        dfx = df.groupby(['Bezirk'], as_index=False).agg({'price':'mean', 'price_sqrt':'mean', 'Bundesland':'first', 'commission':calc_comm, 'count':'size'})
        title = ["Austrian Housing Market", f"{category} prices on district level"]
    
    elif selector == 'state':
        feat_key = "properties.name"
        locations = "Bundesland"
        hover_data = ["Bundesland", "count"]
        hover_name = "Bundesland"
        opacity = 0.65
        title = ["Austrian Housing Market", f"{category} prices on state level"]
        dfx = df.groupby(['Bundesland'], as_index=False).agg({'price':'mean', 'price_sqrt':'median', 'commission':calc_comm, 'count':'size'})

    fig = px.choropleth_mapbox(dfx, geojson=get_geo_data(selector), locations=locations,
                               featureidkey=feat_key, color="price_sqrt",
                               color_continuous_scale="Viridis",
                               range_color=(df['price_sqrt'].quantile(0.25), df['price_sqrt'].quantile(0.75)),
                               mapbox_style="open-street-map",
                               hover_data = hover_data,
                               hover_name = hover_name,
                               #mapbox_style="carto-positron",
                               zoom=6, center = {"lat": 47.809490, "lon": 13.055010},
                               opacity=opacity,
                              )
    
    fig.update_layout(width=1000, height=600,
            title_text=format_title(title[0], f"{title[1]}"),
            font=dict(family="Open Sans"),
            coloraxis_colorbar_title='€/m²'
            )
    fig.update_layout(margin={"r":30,"t":60,"l":30,"b":0})
    fig.show()

In [None]:
aber = df.date.unique().tolist()
#print(a)
from datetime import datetime
k = [print(int(i)) for i in aber]
k




### <a name="geospatial_distribution">Geospatial distribution</a> 
In the following section I will analyze how property prices are distributed accross Austria. The data set includes several categories such as price per m², area code as well as categories such as broker commission, garden, balcony, elevator, cellar or whether a garage is present. All the rows where no price has been specified are dropped.

In the following section I will analyze how property prices are distributed accross Austria. The data set includes several categories such as price per m², the municipal, state, district level as well as categories such as broker commission, garden, balcony, elevator, cellar or garage present.

Outlier are removed from the data set. Only rented flats with up to 5 rooms are considered moving forward.

In order to map the data to a geographic area the geo json data needs to be imported:

The following three geospatial heatmaps depict the real estate prices on a state, district and municipal level. The following heat map shows the real estate prices on a state level. The median prices per m² of rental property are the highest in Tyrol followed by Vorarlberg, Vienna and Salzburg. Vorarlberg however, has only a total of 94 advertisements and might be overrepresented.

In [None]:
geo_heatmap(df, 'state', 'rented flat')

In [None]:
geo_heatmap(df, 'district', 'single-family home') #condominium single-family home

In [None]:
geo_heatmap(df, 'municipal', 'condominium')

In [None]:
geo_heatmap(df, 'municipal', 'rented flat')

Breaking down the geographical structure to the municipal level confirms that even the picture painted on district level
should be perceived with great care. For example, the district "Bludenz" accounts for a large area and has just one data entry (cc is 80405). The situation is similar for the rest of Tyrol which is very underrepresented sample wise compared to the rest of Austria. For some of the districts it is difficult to draw a conclusion since the sample size is very low. 
One example would be Bludenz with just a couple of rental properties.

## Part II: What attributes contributing to a higher condo price the most?

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

df_f = df.drop(['CC', 'PG', 'price'], axis=1)

plt.figure(figsize=(6,5))
cor = df_f.corr()
sns.heatmap(cor, annot=True, cmap=plt.cm.Reds)
plt.show()

In [None]:
cor_target = abs(cor['price_sqrt'])#Selecting correlated features
cor_target[cor_target>0.1]

In [None]:
cor_target = abs(cor['rooms'])
cor_target[cor_target>0.1]

In [None]:
cor_target = abs(cor['cellar'])
cor_target[cor_target>0.1]

At this point it is difficult to draw conclusions regarding certain factors contributing to the rental price of a flat. From the correlation matrix it is obvious that the number of rooms is highly correlated to the living area. Flats with a cellar are more likely to have an elevator and a garage or parking lot. However these are still very weak correlation and needs to be further investigated.

## How is the price changing for the advertisements?
It is interesting to know, how the propery and rental prices are changing over the live time of the advertisement.
First, the number of occourances of each advertisement is calculated.
Afterwards the data set is aggregated indicating us the *first* and the *last* **date** and **price** of the data set.

In [None]:
df7 = df.groupby(['code']).agg({'code': ['first', 'size'], 'price': ['first', 'last'], 'url_rank': ['first','mean','last'], 'date': ['first', 'last'], 'type': ['first'], 'commission': ['first']})
df7.columns = df7.columns.droplevel(0)
df7.columns = ['code', 'occ.', 'p.first', 'p.last', 'r.first', 'r.mean','r.last', 'dt.first', 'dt.last', 'type','commission']
df7['online'] = (df7['dt.last']-df7['dt.first']).dt.days
df7['pdiff'] =  df7['p.last']-df7['p.first']
df7 = df7.reset_index(drop=True)
df7.head(10)

In [None]:
df8 = df7.copy()
df8 = df8.dropna(subset=['p.last'])
df8 = df8[(df8['p.last'] != 0)]
# df8 = df8[(df8['type']=='rented flat') & (np.absolute(df8.pdiff) < 600)]
val=df8[df8['pdiff'] != 0]['pdiff'].mean()

print(f'Avg. price change since initial offering: {round(val)} €')
print(f"{df8[df8['pdiff'] != 0].shape[0]}, {df8.shape[0]}")
df8.reset_index(drop=True, inplace=True)
df8[df8['pdiff'] != 0].head(20)

In [None]:
fig = px.scatter(df8[df8['pdiff'] != 0], x="online", y="pdiff", color="type",
                  hover_data=['type', 'occ.'], width=600, height=400) #size='occ.',
fig.show()

## Part III: What are the chances of getting a condo without a real estate agency?


In [None]:
print(f"Roughly {100 *round(df7.commission.value_counts()[1]/df7.shape[0],3)}% of the flats/houses can only be rented/purchased from a real eastate agency.")

From the 85,000 properties in the data set only 16.7% are without a real estate agency. It will be interesting to track this value over time since the Austrian goverment changed a law regarding the real estate agancy fee structure.

## Part IV: Is it possible to identify paid upgrades in the ads?
In case an advertisement is older but still on the first pages it is identified as a paid upgrade. This are the conditions that need to be met in order to be classified as such:
- if the advertisement is online for an extended period of time (one week)
- AND the mean ranking is below a threshold of 10

According to the [willhaben website](www.willhaben.at) this is the current pricing model for the real estate section. There are additional models available for other topics such as jobs, goods and services, cars etc.


| product   | price | expiration |
|-----------|-------|------------|
| ad sell flat | 39 €  | 30 days    |
| ad rent flat | 29 €  | 30 days    |



Additionally the following upgrades can be purchased:

| upgrade                | price   | expiration   |
|------------------------|---------|--------------|
| re-rank ad to beginning   | 27.90 € | 4 weeks      |
| highlight ad. by color | 17.90 € | once per ad. |


To calculate the total revenue in the housing section (since 2022-06-06; altough the infrastructure was initially not in place to get the whole picture) is estimated with the following equations:
1) $R_{housing}=R_{rental}+R_{real estate}$

2) $R_{flat}=c_{flat}*p_{ad,flat} + c_{upgrades}*p_{upgrade}$

3) $R_{house}=c_{house}*p_{ad,house} + c_{upgrades}*p_{upgrade}$

In [None]:
# define pricing model
price_ad_rent = 29
price_ad_house = 39
price_ad_upgrade = 27.90

df6 = df7.copy()

df6 = df6[(df6['online']>7) & (df6['r.mean']<100)]
df6.sort_values(by='dt.last', ascending = False, inplace=True)
num_upgr_ads_flat= df6[df6['type']=='rented flat'].shape[0]
num_upgr_ads_house= df6[df6['type']=='condominium'].shape[0] + df6[df6['type']=='single-family home'].shape[0]
num_paid_ads= df6.shape[0]

print(f'Since 2022-06-10:\n{num_paid_ads} unique housing ads total\n{num_upgr_ads_flat} customers paid for the flat upgrade\n{num_upgr_ads_house} customers paid for the housing upgrade')

In [None]:
df7.sort_values(by='dt.first', ascending=True, inplace=True)
rent_flat = df7[(df7.type == 'rented flat')]['code'].shape[0]
house = df7[(df7.type == 'condominium') | (df7.type == 'single-family home')]['code'].shape[0]

sume = rent_flat*price_ad_rent+house*price_ad_house
f2_exp = df7[(df7['online']> 30) & (df7.type == 'rented flat')]['code'].shape[0]
h2_exp = df7[(df7['online']> 30) & ((df7.type == 'condominium') | (df7.type == 'single-family home'))]['code'].shape[0]
print(f"Since 2022-06-10:")
print(f"Unique flats: {rent_flat}, unique appartments/houses: {house}")
print(f"Ad revenue generated: {sume} €")
print(f"Additional revenue durch verlängern inserat: {f2_exp*price_ad_rent+ h2_exp*price_ad_house} €")
print(f"Sum: {sume+ f2_exp*price_ad_rent+ h2_exp*price_ad_house} €")

### Part V: How many new ads are appearing on the platform per week?¶

How many advertisments are in a given interval?

## OSM Features

In [2]:
# import modules/libraries
import warnings 
warnings.simplefilter(action='ignore')
import osmnx as ox
import pandas as pd
import numpy as np
import geopandas as gpd
import time
from scipy import stats
import itertools
import os
import pickle
import geojson
from sqlalchemy import create_engine
import re
import sqlite3
from pathlib import Path
from shapely.geometry.polygon import Polygon
from shapely.geometry.multipolygon import MultiPolygon
import chardet
from scipy import spatial
from scipy.spatial import KDTree
from shapely import wkt

cwd = Path().resolve()

# visualisation
import plotly.io as pio
import plotly.graph_objects as go
import plotly.express as px
import plotly.figure_factory as ff
import seaborn as sns
import matplotlib as mpl 
%matplotlib inline 
import matplotlib.pyplot as plt

pd.set_option('display.max_columns', None)

In [3]:
gdf = gpd.read_file(os.path.join(Path(cwd).parent, 'data', 'geojson', 'laender_999_geo.json'))
gdf.head()

Unnamed: 0,name,iso,geometry
0,Tirol,7,"MULTIPOLYGON (((12.70891 46.73637, 12.68781 46..."
1,Vorarlberg,8,"MULTIPOLYGON (((10.13171 47.03008, 10.15697 46..."
2,Wien,9,"MULTIPOLYGON (((16.36286 48.12937, 16.30943 48..."
3,Burgenland,1,"MULTIPOLYGON (((16.06293 46.85060, 15.99344 46..."
4,Kärnten,2,"MULTIPOLYGON (((14.43169 46.44365, 14.30534 46..."


In [4]:
from pathlib import Path
path = Path(os.path.join(Path(cwd).parent, 'data', 'osm', 'Burgenland_water.csv'))
path.is_file()

True

In [11]:
def get_osm_data(region, name, data):
    df = ox.geometries.geometries_from_polygon(region, tags=data[0])
    df.to_csv(os.path.join(Path(cwd).parent, 'data', 'osm', 
                           f'{name}_{list(data[0].values())[0]}.csv'), columns=['geometry'])
    return df


for index, row in gdf.iterrows():
    region = row['geometry']#.unary_union
    t0 = time.time()
    water = get_osm_data(region, row['name'], [{'natural':'water'}])
    #forest = ox.geometries.geometries_from_polygon(region, tags = {'landuse': 'forest'})
    #rivers = ox.geometries.geometries_from_polygon(region, tags = {'waterway': 'river'})
    print (f"Completed in {round(time.time() - t0)} s")

#boundary_geojson.drop(columns=['cartodb_id', 'created_at', 'updated_at'], inplace=True)
#region = boundary_geojson.geometry.unary_union
#region

Completed in 187 s
Completed in 20 s
Completed in 7 s
Completed in 28 s
Completed in 30 s
Completed in 69 s
Completed in 50 s
Completed in 31 s
Completed in 55 s


In [5]:
def import_csv_to_gpd(name):
    """ import the csv file a gepandas dataframe """
    df = pd.read_csv(os.path.join(Path(cwd).parent, 'data', 'osm', name), sep=",")
    df['geometry'] = df['geometry'].apply(wkt.loads)
    gdf = gpd.GeoDataFrame(df, crs='epsg:4326')
    return gdf

water = import_csv_to_gpd('Burgenland_water.csv')

In [16]:
print(water['geometry'][0])

POLYGON ((16.095781 47.2432288, 16.0957333 47.2431287, 16.0956077 47.2432243, 16.0956382 47.2433199, 16.0957174 47.2433168, 16.095781 47.2432288))


In [None]:
fig = go.Figure(go.Scattermapbox(
    mode = "markers+lines",
    lon = [lon_longitude],
    lat = [lon_latitude],
    marker = {'size': 10, 'color': 'blue'})).update_traces(uid=0, below="")

fig.add_trace(go.Scattermapbox(
    mode = "lines",
    lon = list(lats),
    lat = list(lons),
    name = f"{radius_in_meter} m radius",
    marker = {'size': 15, 'color': 'red', 'opacity': 0.2}))

fig.update_layout(
    margin ={'l':0,'t':0,'b':0,'r':0},
    mapbox = {
        'center': {'lon': lon_longitude, 'lat': lon_latitude},
        'style': "stamen-terrain",
        'zoom': 6})
fig.show()

In [None]:
fig = go.Figure(go.Choroplethmapbox(geojson=counties, locations=df.fips, z=df.unemp,
                                    colorscale="Reds", zmin=0, zmax=12,
                                    marker_opacity=0.5, marker_line_width=0), layout=dict(mapbox_style="carto-positron",
                  mapbox_zoom=3, mapbox_center = {"lat": 37, "lon": -95}))
fig.show()