# Welcome to the Lab 🥼🧪
## Where do 1000+ unit portfolios own the greatest share of all homes by zip code in one state?

In this notebook, we are going to create a great map showing Institutional ownership for single family homes by zip code for an entire state.

Welcome to the Lab - LFG

**Note** This notebook will work with any of the 70k+ markets supported by the Parcl Labs API.

As a reminder, you can get your Parcl Labs API key [here](https://dashboard.parcllabs.com/signup) to follow along. 

To run this immediately, you can use Google Colab. Remember, you must set your `PARCL_LABS_API_KEY` as a secret. See this [guide](https://medium.com/@parthdasawant/how-to-use-secrets-in-google-colab-450c38e3ec75) for more information.

[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ParclLabs/parcllabs-examples/blob/main/python/inspiration/map_of_investor_ownership_zip_code.ipynb)


You can expect to create this:

![Chart](../assets/atlanta_investor_ownership.png)

In [None]:
# Environment setup
import os
import sys
import json
import subprocess
from datetime import datetime
from urllib.request import urlopen

# Collab setup from one click above
if "google.colab" in sys.modules:
    from google.colab import userdata
    %pip install parcllabs plotly kaleido
    api_key = userdata.get('PARCL_LABS_API_KEY')
else:
    api_key = os.getenv('PARCL_LABS_API_KEY')

In [None]:
import parcllabs
import pandas as pd
import plotly.express as px
from parcllabs import ParclLabsClient

print(f"Parcl Labs Version: {parcllabs.__version__}")

In [None]:
client = ParclLabsClient(api_key=api_key)

In [None]:
# get all zips in GA
ga_zips = client.search_markets.retrieve(
    location_type='ZIP5',
    state_abbreviation='GA',
    as_dataframe=True,
    params={
        'limit': 1000
    }
)

ga_zips

In [None]:
# get the ids
zip_ids = ga_zips['parcl_id'].tolist()
len(zip_ids)

In [None]:
# How big are the investors? Let's find out
portfolio_size_ownership = client.portfolio_metrics_sf_housing_stock_ownership.retrieve_many(
    parcl_ids=zip_ids,
    as_dataframe=True,
    params={
        'limit': 1
    }
)

In [None]:
# merge with search results to get geoid

df = pd.merge(portfolio_size_ownership, ga_zips, on='parcl_id')
df.head()

In [None]:
df.sort_values('pct_sf_housing_stock_portfolio_1000_plus', ascending=False)[['name', 'state_abbreviation', 'count_portfolio_1000_plus', 'pct_sf_housing_stock_portfolio_1000_plus']].head(20)
                                                                           

In [None]:
df = df[['count_portfolio_1000_plus', 'pct_sf_housing_stock_portfolio_1000_plus', 'name']]
df.head()

In [None]:
# lets also get number of single family homes owned by institutions

In [None]:
# lets create the map
with urlopen('https://raw.githubusercontent.com/OpenDataDE/State-zip-code-GeoJSON/master/ga_georgia_zip_codes_geo.min.json') as response:
    zips = json.load(response)

In [None]:
# rename geoid to fips
df = df.rename(columns={'name': 'ZCTA5CE10'})
df.head()

In [None]:
df.head()

In [None]:
all_fips_ids = [i['properties']['ZCTA5CE10'] for i in zips['features']]
len(all_fips_ids)

In [None]:
# fill holes with no data
map_ids = df['ZCTA5CE10'].unique().tolist()
missing = [i for i in all_fips_ids if i not in map_ids]
missing_df = pd.DataFrame({'ZCTA5CE10': missing})
missing_df['count_portfolio_1000_plus'] = 0
missing_df['pct_sf_housing_stock_portfolio_1000_plus'] = 0


out = pd.concat([df, missing_df])
out

In [None]:
labs_logo_lookup = {
    'blue': 'https://parcllabs-assets.s3.amazonaws.com/powered-by-parcllabs-api.png',
    'white': 'https://parcllabs-assets.s3.amazonaws.com/powered-by-parcllabs-api-logo-white+(1).svg'
}

# set charting constants
labs_logo_dict = dict(
        source=labs_logo_lookup['white'],
        xref="paper",
        yref="paper",
        x=0.5,  # Centering the logo below the title
        y=1.02,  # Adjust this value to position the logo just below the title
        sizex=0.15, 
        sizey=0.15,
        xanchor="center",
        yanchor="bottom"
)

media_img_size_lookup = {
    'X': {
        'width': 1600,
        'height': 900
    }
}

PLATFORM = 'X'

# set image sizes
IMG_WIDTH = media_img_size_lookup[PLATFORM]['width']
IMG_HEIGHT = media_img_size_lookup[PLATFORM]['height']

# plotting title settings
PLOT_TITLE_SETTINGS = {
        'y':0,
        'x':0,
        'xanchor': 'left',
        'yanchor': 'bottom'
    }

In [None]:
import plotly.express as px
import pandas as pd

# Assuming 'df' and 'ga_zips' are properly configured

# Define the color scale
color_continuous_scale=[
    (0, "#EEF7FF"),
    (0.2, "#B0CFFF"),  
    (1, "#04428C") 
]

# Set your Mapbox access token
mapbox_access_token = os.getenv('MBOX_ACCESS_TOKEN')

px.set_mapbox_access_token(mapbox_access_token)

# Create the choropleth map
fig = px.choropleth_mapbox(out,
                           geojson=zips, 
                           featureidkey='properties.ZCTA5CE10',
                           locations='ZCTA5CE10', 
                           color='pct_sf_housing_stock_portfolio_1000_plus',
                           color_continuous_scale=color_continuous_scale,
                           range_color=(0, 10),
                           mapbox_style="mapbox://styles/mapbox/dark-v9",
                           zoom=6,
                           center={"lat": 32.3846, "lon": -83.2554},# latitude 32° 38´ 46" N., longitude 83° 25´ 54"
                           opacity=0.9,
                           labels={'pct_sf_housing_stock_portfolio_1000_plus':'% Homes Owned'}
                          )

# Adjust color bar and title text color
fig.update_coloraxes(colorbar=dict(
    title="% of SFH's Owned",
    tickfont=dict(color='white', size=8),
    title_font=dict(color='white', size=10),  # Title font size set to 10
    # modify color bar position to the bottom center
    #x=0.5,  # Color bar x position (left)
    y=-0.01, # Color bar y position (middle vertically)
    len=0.35, # Length of the color bar (70% of the vertical dimension)
    orientation='h',  # Horizontal color bar
    thickness=15,  # Thickness of the color bar
    xanchor='center',  # Center the color bar horizontally
)
)


fig.add_layout_image(dict(
     source=labs_logo_lookup['white'],
     xref="paper",
     yref="paper",
     x=0.5,
     y=1.03,
     sizex=0.15, 
     sizey=0.15,
     xanchor="center",
     yanchor="top"
))

# Update the layout for background color, margins, and figure dimensions
fig.update_layout(
    title='% of Single Family Homes Owned by Large Institutional Portfolios (1000+ Units) Georgia',
    title_x=0.5,  # Center the title
    title_font=dict(color='white', size=16),
    paper_bgcolor='#080D16',
    geo_bgcolor='#080D16',
    margin={"r":0, "t":60, "l":0, "b":0},  # Adjust top margin to give space for the title
    width=1200,
    height=800,
    
   
)

# Display the map
fig.show()

In [None]:
# lets write the ranked tweet: 

# sort
out = out.sort_values('pct_sf_housing_stock_portfolio_1000_plus', ascending=False).reset_index(drop=True).head(50)

for index, row in out.iterrows():
    rank = index+1
    name = row['ZCTA5CE10']
    pct = row['pct_sf_housing_stock_portfolio_1000_plus']
    msg = f"{rank}. {name}: Own {pct}% of SFH's"
    print(msg)