# Housing Rental Analysis for San Francisco

In [40]:
# Import the required libraries and dependencies
import pandas as pd
import hvplot.pandas
from pathlib import Path

# Import the hvPlot library
import hvplot.pandas
import holoviews as hv

## Import the data 

In [41]:
# Using the read_csv function and Path module, create a DataFrame 
# by importing the sfo_neighborhoods_census_data.csv file from the Resources folder
sfo_data_df = pd.read_csv(
    Path('/Users/tej/Documents/GitHub/PyViz_Challenge/sfo_neighborhoods_census_data.csv') # Read in the input file
)

# Review the first and last five rows of the DataFrame
display( "***Head***", sfo_data_df.head() )
display( "***Tail***", sfo_data_df.tail() )

'***Head***'

Unnamed: 0,year,neighborhood,sale_price_sqr_foot,housing_units,gross_rent
0,2010,Alamo Square,291.182945,372560,1239
1,2010,Anza Vista,267.932583,372560,1239
2,2010,Bayview,170.098665,372560,1239
3,2010,Buena Vista Park,347.394919,372560,1239
4,2010,Central Richmond,319.027623,372560,1239


'***Tail***'

Unnamed: 0,year,neighborhood,sale_price_sqr_foot,housing_units,gross_rent
392,2016,Telegraph Hill,903.049771,384242,4390
393,2016,Twin Peaks,970.08547,384242,4390
394,2016,Van Ness/ Civic Center,552.602567,384242,4390
395,2016,Visitacion Valley,328.319007,384242,4390
396,2016,Westwood Park,631.195426,384242,4390


### Step 1: Use the `groupby` function to group the data by year. Aggregate the results by the `mean` of the groups.

In [42]:
# Create a numerical aggregation that groups the data by the year and then averages the results.
housing_units_by_year = sfo_data_df[["year","housing_units"]].groupby("year").mean()

# Review the DataFrame
housing_units_by_year

Unnamed: 0_level_0,housing_units
year,Unnamed: 1_level_1
2010,372560.0
2011,374507.0
2012,376454.0
2013,378401.0
2014,380348.0
2015,382295.0
2016,384242.0


### Step 2: Use the `hvplot` function to plot the `housing_units_by_year` DataFrame as a bar chart. Make the x-axis represent the `year` and the y-axis represent the `housing_units`.

### Step 3: Style and format the line plot to ensure a professionally styled visualisation.

In [73]:
# Create a visual aggregation to explore the housing units by year
min_year = sfo_data_df["year"].min()   # Get the earliest year in the data to use in the title as "from" year
max_year = sfo_data_df["year"].max()   # Get the latest year in the data to use in the title as "to" year

# Adjust the plot's minimum y-axis to better show the scale of comparative change from one year to the next
min_y_axis = sfo_data_df["housing_units"].min()  # Get the lowest value in our Y axis range
min_y_axis -= (min_y_axis*0.025)                 # Set the plot's min_y_axis value to 2.5% below that value

fig1_title = f"Figure 1 - Housing Units in San Francisco from {min_year} to {max_year}" # Construct the title using the variable years 
fig1 = housing_units_by_year.groupby("year").mean().hvplot.bar(                         # Group the data by year and get the average then create the plot
    xlabel="Year",                # Set the x axis label
    ylabel="Housing Units",       # Set the y axis label
    ylim=(min_y_axis,None),       # Set the y axis minimum limit but leave the maximum to automatic
    color="blue",                 # Set the bar to blue
    rot=0,                        # Set the rotation
    height=500,                   # Set the chart height
    width=1000                    # Set the chart width
).opts( yformatter='%.0f',        # Format the numbers in standard notation rather than scientific notation which is the default
        title=fig1_title )        # Set the chart's title

fig1 # Display the plot

In [44]:
# Determine the year-on-year percentage change
housing_units_by_year.groupby("year")["housing_units"].mean().pct_change().dropna()

year
2011    0.005226
2012    0.005199
2013    0.005172
2014    0.005145
2015    0.005119
2016    0.005093
Name: housing_units, dtype: float64

### Step 5: Answer the following question:

**Question:** What is the overall trend in housing_units over the period being analysed?

**Answer:** Between 2010 and 2016, there has been an annual growth rate of approximately 0.5% each year.

---

### Step 1: Group the data by year, and then average the results.

In [45]:
# Create a numerical aggregation by grouping the data by year and averaging the results
prices_square_foot_by_year = sfo_data_df[["year","sale_price_sqr_foot"]].groupby("year").mean()

# Review the resulting DataFrame
prices_square_foot_by_year

Unnamed: 0_level_0,sale_price_sqr_foot
year,Unnamed: 1_level_1
2010,369.344353
2011,341.903429
2012,399.389968
2013,483.600304
2014,556.277273
2015,632.540352
2016,697.643709


In [46]:
lowest_rent =  prices_square_foot_by_year["sale_price_sqr_foot"].min()

print(f"The lowest gross rent reported for the years included in the DataFrame is ${lowest_rent:0,.2f} per sqr foot" )

The lowest gross rent reported for the years included in the DataFrame is $341.90 per sqr foot


**Question:** What is the lowest gross rent reported for the years included in the DataFrame?

**Answer:** $341.90  

### Step 2: Create a new DataFrame named `prices_square_foot_by_year` by filtering out the “housing_units” column. The new DataFrame should include the averages per year for only the sale price per square foot and the gross rent.

In [47]:
# Filter out the housing_units column, creating a new DataFrame 
# Keep only sale_price_sqr_foot and gross_rent averages per year
prices_square_foot_by_year =  sfo_data_df[["year","sale_price_sqr_foot","gross_rent"]].groupby("year").mean()

# Review the DataFrame
prices_square_foot_by_year

Unnamed: 0_level_0,sale_price_sqr_foot,gross_rent
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2010,369.344353,1239.0
2011,341.903429,1530.0
2012,399.389968,2324.0
2013,483.600304,2971.0
2014,556.277273,3528.0
2015,632.540352,3739.0
2016,697.643709,4390.0


### Step 3: Use hvPlot to plot the `prices_square_foot_by_year` DataFrame as a line plot.



### Step 4: Style and format the line plot to ensure a professionally styled visualisation.


In [48]:
# Plot prices_square_foot_by_year. 
fig2_title = f"Figure 2 - Sale Price Per Square Foot and Average Gross Rent - {min_year}-{max_year} - San Francisco"
fig2 = prices_square_foot_by_year.hvplot.line(
    xlabel="Year",                                        # Set the x axis label
    group_label="Legend",                                 # Change the legend's title from the default which is "Variable" to "Legend"    
    ylabel="Gross Rent / Sale Price Per Square Foot",     # Set the y axis label
    height=500,                                           # Set the chart height
    width=1000                                            # Set the chart width
).opts( yformatter='%.0f',                                # Format the numbers in standard notation rather than scientific notation which is the default
        title=fig2_title )                                # Set the chart's title

# Inclued labels for the x- and y-axes, and a title.
fig2

### Step 6: Use both the `prices_square_foot_by_year` DataFrame and interactive plots to answer the following questions:

In [49]:
housing_pct_change = prices_square_foot_by_year.groupby("year").mean().pct_change().dropna()
housing_pct_change

Unnamed: 0_level_0,sale_price_sqr_foot,gross_rent
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2011,-0.074296,0.234867
2012,0.168137,0.518954
2013,0.210847,0.278399
2014,0.150283,0.187479
2015,0.137095,0.059807
2016,0.102924,0.174111


In [50]:
print("Years that experienced a drop in the average sale price per square foot compared to the previous year:")
housing_pct_change[housing_pct_change["sale_price_sqr_foot"]<0].dropna()

Years that experienced a drop in the average sale price per square foot compared to the previous year:


Unnamed: 0_level_0,sale_price_sqr_foot,gross_rent
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2011,-0.074296,0.234867


**Question:** Did any year experience a drop in the average sale price per square foot compared to the previous year?

**Answer:** In the year 2011, there was a decline of 7.4% in the average selling price per square foot in San Francisco.

**Question:** If so, did the gross rent increase or decrease during that year?

**Answer:** The gross rent increased by 23.5% the same year.

---

### Step 1: Create a new DataFrame that groups the original DataFrame by year and neighborhood. Aggregate the results by the `mean` of the groups.

In [51]:
# Group by year and neighborhood and then create a new dataframe of the mean values
prices_by_year_by_neighborhood = sfo_data_df.groupby(["year", "neighborhood"]).mean()

# Review the DataFrame
prices_by_year_by_neighborhood

Unnamed: 0_level_0,Unnamed: 1_level_0,sale_price_sqr_foot,housing_units,gross_rent
year,neighborhood,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010,Alamo Square,291.182945,372560.0,1239.0
2010,Anza Vista,267.932583,372560.0,1239.0
2010,Bayview,170.098665,372560.0,1239.0
2010,Buena Vista Park,347.394919,372560.0,1239.0
2010,Central Richmond,319.027623,372560.0,1239.0
...,...,...,...,...
2016,Telegraph Hill,903.049771,384242.0,4390.0
2016,Twin Peaks,970.085470,384242.0,4390.0
2016,Van Ness/ Civic Center,552.602567,384242.0,4390.0
2016,Visitacion Valley,328.319007,384242.0,4390.0


### Step 2: Filter out the “housing_units” column to create a DataFrame that includes only the `sale_price_sqr_foot` and `gross_rent` averages per year.

In [52]:
# Filter out the housing_units
prices_by_year_by_neighborhood = prices_by_year_by_neighborhood.drop(columns=["housing_units"])
# Review the first and last five rows of the DataFrame
display( prices_by_year_by_neighborhood.head() )
display( prices_by_year_by_neighborhood.tail() )

Unnamed: 0_level_0,Unnamed: 1_level_0,sale_price_sqr_foot,gross_rent
year,neighborhood,Unnamed: 2_level_1,Unnamed: 3_level_1
2010,Alamo Square,291.182945,1239.0
2010,Anza Vista,267.932583,1239.0
2010,Bayview,170.098665,1239.0
2010,Buena Vista Park,347.394919,1239.0
2010,Central Richmond,319.027623,1239.0


Unnamed: 0_level_0,Unnamed: 1_level_0,sale_price_sqr_foot,gross_rent
year,neighborhood,Unnamed: 2_level_1,Unnamed: 3_level_1
2016,Telegraph Hill,903.049771,4390.0
2016,Twin Peaks,970.08547,4390.0
2016,Van Ness/ Civic Center,552.602567,4390.0
2016,Visitacion Valley,328.319007,4390.0
2016,Westwood Park,631.195426,4390.0


### Step 3: Create an interactive line plot with hvPlot that visualises both `sale_price_sqr_foot` and `gross_rent`. Set the x-axis parameter to the year (`x="year"`). Use the `groupby` parameter to create an interactive widget for `neighborhood`.

### Step 4: Style and format the line plot to ensure a professionally styled visualisation.

In [53]:
# Using hvplot to create an interactive line plot of the average price per square foot
# The plot will have a dropdown selector for the neighborhood

fig3_title = f"Figure 3 - Sale Price Per Square Foot and Average Gross Rent - {min_year}-{max_year} - By Neighborhood" # Construct the title based on fixed and variable data
fig3 = prices_by_year_by_neighborhood.hvplot.line(
    x="year",                                           # Force the x axis to the Year
    y=["sale_price_sqr_foot", "gross_rent"],            # Plot both the sales price and rent on the Y axis
    group_label="Legend",                               # Change the legend's title from the default which is "Variable" to "Legend"
    groupby="neighborhood",                             # Set the dropdown selector to allow selection of Neighborhood
    xlabel="Year",                                      # Label the x axis
    ylabel="Gross Rent / Sale Price Per Square Foot",   # Label the y axis
    height=500,
    width=800
).opts( yformatter='%.0f',                              # Set numbers to standard notation
        title=fig3_title)                               # Set the Title based

fig3 # Display the plot

### Step 6: Use the interactive visualisation to answer the following question:

**Question:** For the Anza Vista neighborhood, is the average sale price per square foot for 2016 more or less than the price that’s listed for 2012? 

**Answer:** The Anza Vista neighborhood had an average sale price per square foot of $344.491 in 2012, which decreased to $88.402 in 2016, indicating a reduction in price from 2012 to 2016.

---

### Step 1: Read the `neighborhood_coordinates.csv` file from the `Resources` folder into the notebook, and create a DataFrame named `neighborhood_locations_df`. Be sure to set the `index_col` of the DataFrame as “Neighborhood”.

In [54]:
# Load neighborhoods coordinates data
neighborhood_locations_df = pd.read_csv( Path('/Users/tej/Documents/GitHub/PyViz_Challenge/neighborhoods_coordinates.csv'), index_col="Neighborhood" )

# Review the DataFrame
display( neighborhood_locations_df.info() )
display( neighborhood_locations_df.head() )

<class 'pandas.core.frame.DataFrame'>
Index: 73 entries, Alamo Square to Yerba Buena
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Lat     73 non-null     float64
 1   Lon     73 non-null     float64
dtypes: float64(2)
memory usage: 1.7+ KB


None

Unnamed: 0_level_0,Lat,Lon
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1
Alamo Square,37.791012,-122.4021
Anza Vista,37.779598,-122.443451
Bayview,37.73467,-122.40106
Bayview Heights,37.72874,-122.41098
Bernal Heights,37.72863,-122.44305


### Step 2: Using the original `sfo_data_df` Dataframe, create a DataFrame named `all_neighborhood_info_df` that groups the data by neighborhood. Aggregate the results by the `mean` of the group.

In [55]:
# Calculate the mean values for each neighborhood
all_neighborhood_info_df = sfo_data_df[["neighborhood", "sale_price_sqr_foot", "gross_rent"]].groupby("neighborhood").mean()


# Review the resulting DataFrame
display( all_neighborhood_info_df )

Unnamed: 0_level_0,sale_price_sqr_foot,gross_rent
neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1
Alamo Square,366.020712,2817.285714
Anza Vista,373.382198,3031.833333
Bayview,204.588623,2318.400000
Bayview Heights,590.792839,3739.000000
Bernal Heights,576.746488,3080.333333
...,...,...
West Portal,498.488485,2515.500000
Western Addition,307.562201,2555.166667
Westwood Highlands,533.703935,2250.500000
Westwood Park,687.087575,3959.000000


### Step 3: Review the two code cells that concatenate the `neighborhood_locations_df` DataFrame with the `all_neighborhood_info_df` DataFrame. 

In [56]:
# Using the Pandas `concat` function, join the 
# neighborhood_locations_df and the all_neighborhood_info_df DataFrame
# The axis of the concatenation is "columns".
# The concat function will automatially combine columns with
# identical information, while keeping the additional columns.
all_neighborhoods_df = pd.concat(
    [neighborhood_locations_df, all_neighborhood_info_df], 
    axis="columns",
    sort=False
)

# Review the resulting DataFrame
display(all_neighborhoods_df.head())
display(all_neighborhoods_df.tail())


Unnamed: 0,Lat,Lon,sale_price_sqr_foot,gross_rent
Alamo Square,37.791012,-122.4021,366.020712,2817.285714
Anza Vista,37.779598,-122.443451,373.382198,3031.833333
Bayview,37.73467,-122.40106,204.588623,2318.4
Bayview Heights,37.72874,-122.41098,590.792839,3739.0
Bernal Heights,37.72863,-122.44305,,


Unnamed: 0,Lat,Lon,sale_price_sqr_foot,gross_rent
Yerba Buena,37.79298,-122.39636,576.709848,2555.166667
Bernal Heights,,,576.746488,3080.333333
Downtown,,,391.434378,2817.285714
Ingleside,,,367.895144,2509.0
Outer Richmond,,,473.900773,2817.285714


In [57]:
# Call the dropna function to remove any neighborhoods that do not have data
all_neighborhoods_df = all_neighborhoods_df.reset_index().dropna()

# Rename the "index" column as "Neighborhood" for use in the visualisation
all_neighborhoods_df = all_neighborhoods_df.rename(columns={"index": "Neighborhood"})

# Review the resulting DataFrame
display(all_neighborhoods_df.head())
display(all_neighborhoods_df.tail())

Unnamed: 0,Neighborhood,Lat,Lon,sale_price_sqr_foot,gross_rent
0,Alamo Square,37.791012,-122.4021,366.020712,2817.285714
1,Anza Vista,37.779598,-122.443451,373.382198,3031.833333
2,Bayview,37.73467,-122.40106,204.588623,2318.4
3,Bayview Heights,37.72874,-122.41098,590.792839,3739.0
5,Buena Vista Park,37.76816,-122.43933,452.680591,2698.833333


Unnamed: 0,Neighborhood,Lat,Lon,sale_price_sqr_foot,gross_rent
68,West Portal,37.74026,-122.46388,498.488485,2515.5
69,Western Addition,37.79298,-122.43579,307.562201,2555.166667
70,Westwood Highlands,37.7347,-122.456854,533.703935,2250.5
71,Westwood Park,37.73415,-122.457,687.087575,3959.0
72,Yerba Buena,37.79298,-122.39636,576.709848,2555.166667


### Step 4: Using hvPlot with GeoViews enabled, create a `points` plot for the `all_neighborhoods_df` DataFrame. Be sure to do the following:

* Set the `geo` parameter to True.
* Set the `size` parameter to “sale_price_sqr_foot”.
* Set the `color` parameter to “gross_rent”.
* Set the `frame_width` parameter to 700.
* Set the `frame_height` parameter to 500.
* Include a descriptive title.

In [58]:
# Show a map of each neighborhood. Bubble are used to indicate the neighborhood's location on the map.
# The Bubble size indicates the comparative Sale Price Per Square Foot.
# The colour shade of the bubble represents Average Gross Rent By Neighborhood

# Create a plot to analyse neighborhood info
fig4 = all_neighborhoods_df.hvplot.points(
    x='Lon',                        # Longitude coordinate on x axis
    y='Lat',                        # Latitude coordinate on y axis
    xlabel="Longitute",             # Label the x axis
    ylabel="Latitude",              # Label the y axis
    clabel = "Average Gross Rent",
    geo=True,                       # Enable GeoViews
    size='sale_price_sqr_foot',     # Set bubble size to be based off the sales price per sq ft
    color='gross_rent',             # Set the colour scale to be based on the gross rent
    tiles='OSM',                    # Show the Open Street Map (OSM)
    frame_width=700,                # Set the frame width
    frame_height=500,               # Set the frame height
    title='Figure 4 - Sale Price Per Square Foot and Average Gross Rent By Neighborhood', # Set the title
    yformatter='%.2f',              # Format the numbers to avoid scientific notation
    hover_cols=['Neighborhood', 'sale_price_sqr_foot', 'gross_rent'], # Include extra details in the hover panel
    use_index=False                  # Exclude the index from the hover data pop-up
)

display(fig4) # Display the plot

**Figure 4 Legend**   
Bubble Size: Sale Price Per Square Foot   
Colour Shade: Average Gross Rent By Neighborhood   

### Step 5: Use the interactive map to answer the following question:

**Question:** Which neighborhood has the highest gross rent, and which has the highest sale price per square foot?

**Answer:** The neighborhood with the highest gross rent appears to be Westwood Park, where it's noted by the dark blue marker in the southwest region of the map at coordinates -122.4570 longitude and 37.7342 latitude. Conversely, the neighborhood with the highest sale price per square foot appears to be the Union Square District, indicated by the largest marker in the northeast part of the map, with coordinates at -122.4021 longitude and 37.7910 latitude.

## Data Story

**Question:**  How does the trend in rental income growth compare to the trend in sales prices? Does this same trend hold true for all the neighborhoods across San Francisco?

**Answer:** Figure 2 indicates that, from 2010 to 2016, the average rental income in San Francisco County increased significantly by 254%. In contrast, average sales prices experienced a comparatively modest growth of 89% during the same time frame. However, it's important to note that this trend is not consistent across all neighborhoods within San Francisco, as evident from our observations in Figure 3. In some neighborhoods, sales prices declined, with the most substantial drop being -71%, while others saw remarkable increases of up to 412%. Likewise, gross rental returns varied widely, ranging from no noticeable increase to a substantial 254% growth.


**Question:** What insights can you share with your company about the potential one-click, buy-and-rent strategy that they're pursuing? Do neighborhoods exist that you would suggest for investment, and why?

**Answer:** Investors employing buy-and-rent strategies aim for optimal rental returns and property value appreciation. In San Francisco, the southern regions demonstrate robust gross rental incomes. However, when factoring in the purchase cost and subsequently rental yield, the southeastern areas exhibit more promising returns. Further examination outlined below suggests that considering an investment in Visitacion Valley could be advantageous for the following reasons:

1. Visitacion Valley has consistently yielded favorable returns, averaging 12 times the sales price.
2. Despite a decline in sales prices in 2014, Visitacion Valley has witnessed a subsequent positive upswing in property values.
3. Gross rent growth has shown steady and continuous improvement since 2013.

---

# Additional Analysis

In [59]:
# Create a data frame with an added column containing the rental yield (ie rental return percentage of the purchase price)
all_neighborhood_info_df_with_yield = all_neighborhood_info_df

all_neighborhood_info_df_with_yield["yield"] = all_neighborhood_info_df_with_yield["gross_rent"] /  all_neighborhood_info_df_with_yield["sale_price_sqr_foot"]

In [60]:
all_neighborhood_info_df_with_yield.nlargest(10, "yield")

Unnamed: 0_level_0,sale_price_sqr_foot,gross_rent,yield
neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Silver Terrace,170.292549,3528.0,20.717289
Hunters Point,170.62492,2489.0,14.587553
Outer Mission,242.370952,2995.75,12.360186
Visitacion Valley,301.46618,3657.0,12.130714
Bayview,204.588623,2318.4,11.332008
Croker Amazon,303.004184,2698.833333,8.906918
Western Addition,307.562201,2555.166667,8.307805
Anza Vista,373.382198,3031.833333,8.119919
Hayes Valley,355.932828,2817.285714,7.915217
Excelsior,388.765927,3031.833333,7.798609


In [61]:
all_neighborhood_info_df_with_yield.nlargest(10, "gross_rent")

Unnamed: 0_level_0,sale_price_sqr_foot,gross_rent,yield
neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Westwood Park,687.087575,3959.0,5.762002
Bayview Heights,590.792839,3739.0,6.328784
Visitacion Valley,301.46618,3657.0,12.130714
Silver Terrace,170.292549,3528.0,20.717289
Merced Heights,788.844818,3414.0,4.327847
Inner Parkside,519.385604,3224.0,6.207334
Mission Terrace,523.466201,3173.4,6.062283
Bernal Heights,576.746488,3080.333333,5.340879
Anza Vista,373.382198,3031.833333,8.119919
Excelsior,388.765927,3031.833333,7.798609


In [62]:
# Using the Pandas `concat` function, join the 
# neighborhood_locations_df and the all_neighborhood_info_df DataFrame
# The axis of the concatenation is "columns".
# The concat function will automatially combine columns with
# identical information, while keeping the additional columns.
all_neighborhoods_yield_df = pd.concat(
    [neighborhood_locations_df, all_neighborhood_info_df_with_yield], 
    axis="columns",
    sort=False
)

# Review the resulting DataFrame
display(all_neighborhoods_yield_df.head())
display(all_neighborhoods_yield_df.tail())

Unnamed: 0,Lat,Lon,sale_price_sqr_foot,gross_rent,yield
Alamo Square,37.791012,-122.4021,366.020712,2817.285714,7.697066
Anza Vista,37.779598,-122.443451,373.382198,3031.833333,8.119919
Bayview,37.73467,-122.40106,204.588623,2318.4,11.332008
Bayview Heights,37.72874,-122.41098,590.792839,3739.0,6.328784
Bernal Heights,37.72863,-122.44305,,,


Unnamed: 0,Lat,Lon,sale_price_sqr_foot,gross_rent,yield
Yerba Buena,37.79298,-122.39636,576.709848,2555.166667,4.430593
Bernal Heights,,,576.746488,3080.333333,5.340879
Downtown,,,391.434378,2817.285714,7.197338
Ingleside,,,367.895144,2509.0,6.819878
Outer Richmond,,,473.900773,2817.285714,5.944885


In [63]:
# Call the dropna function to remove any neighborhoods that do not have data
all_neighborhoods_yield_df = all_neighborhoods_yield_df.reset_index().dropna()

# Rename the "index" column as "Neighborhood" for use in the visualisation
all_neighborhoods_yield_df = all_neighborhoods_yield_df.rename(columns={"index": "Neighborhood"})

# Review the resulting DataFrame
display(all_neighborhoods_yield_df.head())
display(all_neighborhoods_yield_df.tail())

Unnamed: 0,Neighborhood,Lat,Lon,sale_price_sqr_foot,gross_rent,yield
0,Alamo Square,37.791012,-122.4021,366.020712,2817.285714,7.697066
1,Anza Vista,37.779598,-122.443451,373.382198,3031.833333,8.119919
2,Bayview,37.73467,-122.40106,204.588623,2318.4,11.332008
3,Bayview Heights,37.72874,-122.41098,590.792839,3739.0,6.328784
5,Buena Vista Park,37.76816,-122.43933,452.680591,2698.833333,5.961893


Unnamed: 0,Neighborhood,Lat,Lon,sale_price_sqr_foot,gross_rent,yield
68,West Portal,37.74026,-122.46388,498.488485,2515.5,5.046255
69,Western Addition,37.79298,-122.43579,307.562201,2555.166667,8.307805
70,Westwood Highlands,37.7347,-122.456854,533.703935,2250.5,4.216757
71,Westwood Park,37.73415,-122.457,687.087575,3959.0,5.762002
72,Yerba Buena,37.79298,-122.39636,576.709848,2555.166667,4.430593


In [64]:
# Show a map of each neighborhood with a colour shade representing the rental yield.

# Create a plot to analyse neighborhood info
fig5 = all_neighborhoods_yield_df.hvplot.points(
    x='Lon',                        # Longitude coordinate on x axis
    y='Lat',                        # Latitude coordinate on y axis
    xlabel="Longitute",             # Label the x axis
    ylabel="Latitude",              # Label the y axis
    clabel = "Yield",
    geo=True,                       # Enable GeoViews
    size=200,                       # Set bubble size to a fixed value since we are using the colour shading
    color='yield',                  # Set the colour scale to be based on the gross rent
    tiles='OSM',                    # Show the Open Street Map (OSM)
    frame_width=700,                # Set the frame width
    frame_height=500,               # Set the frame height
    title='Figure 5 - Yield By Neighborhood', # Set the title
    yformatter='%.2f',              # Format the numbers to avoid scientific notation
    hover_cols=['Neighborhood', 'sale_price_sqr_foot', 'gross_rent', 'yield'], # Include extra details in the hover panel
    use_index=False                  # Exclude the index from the hover data pop-up
)

display(fig5) # Display the plot

## Compare rental yield and sales turnover
Using Figure 5, by obvervation, the south east area indicates higher rental yields.

## Check sales history of candidate neighborhoods
Checking the sales history of neighborhoods of potential interest, so that the investor is confident there is a decent property market for entry. Neighborhoods with high yield and little history may not offer true representation.

### Check Silver Terrace

In [65]:
all_neighborhoods_yield_df.query('Neighborhood == "Silver Terrace"') # Show yield information for the selected neighborhood

Unnamed: 0,Neighborhood,Lat,Lon,sale_price_sqr_foot,gross_rent,yield
59,Silver Terrace,37.73467,-122.40106,170.292549,3528.0,20.717289


In [66]:
prices_by_year_by_neighborhood.query('neighborhood == "Silver Terrace"') # Show sales and gross rent history over the years

Unnamed: 0_level_0,Unnamed: 1_level_0,sale_price_sqr_foot,gross_rent
year,neighborhood,Unnamed: 2_level_1,Unnamed: 3_level_1
2014,Silver Terrace,170.292549,3528.0


Silver Terrace had the highest rental yield, at 20.7 times sales price, however there were no further sales in the following years raising concerns of market liquidity.

---

### Check Hunters Point

In [67]:
all_neighborhoods_yield_df.query('Neighborhood == "Hunters Point"') # Show yield information for the selected neighborhood

Unnamed: 0,Neighborhood,Lat,Lon,sale_price_sqr_foot,gross_rent,yield
24,Hunters Point,37.72551,-122.37178,170.62492,2489.0,14.587553


In [68]:
prices_by_year_by_neighborhood.query('neighborhood == "Hunters Point"') # Show sales and gross rent history over the years

Unnamed: 0_level_0,Unnamed: 1_level_0,sale_price_sqr_foot,gross_rent
year,neighborhood,Unnamed: 2_level_1,Unnamed: 3_level_1
2010,Hunters Point,170.62492,1239.0
2015,Hunters Point,,3739.0


The rental yield in Hunters Point was a commendable 14.6 times the sale price; nevertheless, the limited historical data raises some concerns.

### Check Outer Mission

In [69]:
all_neighborhoods_yield_df.query('Neighborhood == "Outer Mission"') # Show yield information for the selected neighborhood

Unnamed: 0,Neighborhood,Lat,Lon,sale_price_sqr_foot,gross_rent,yield
47,Outer Mission,37.7228,-122.43869,242.370952,2995.75,12.360186


In [70]:
prices_by_year_by_neighborhood.query('neighborhood == "Outer Mission"') # Show sales and gross rent history over the years

Unnamed: 0_level_0,Unnamed: 1_level_0,sale_price_sqr_foot,gross_rent
year,neighborhood,Unnamed: 2_level_1,Unnamed: 3_level_1
2011,Outer Mission,142.142568,1530.0
2012,Outer Mission,221.881139,2324.0
2015,Outer Mission,564.687476,3739.0
2016,Outer Mission,40.772625,4390.0


Outer Mission experienced a notable decline in sales prices from 2015 to 2016, which could either be viewed as a potential investment opportunity or a cause for worry, depending on the underlying reasons for the decrease. It would be necessary to conduct a more in-depth analysis to understand the factors contributing to this decline before making any recommendations regarding Outer Mission.

### Check Visitacion Valley

In [71]:
all_neighborhoods_yield_df.query('Neighborhood == "Visitacion Valley"') # Show yield information for the selected neighborhood

Unnamed: 0,Neighborhood,Lat,Lon,sale_price_sqr_foot,gross_rent,yield
67,Visitacion Valley,37.72874,-122.41098,301.46618,3657.0,12.130714


In [72]:
prices_by_year_by_neighborhood.query('neighborhood == "Visitacion Valley"') # Show sales and gross rent history over the years

Unnamed: 0_level_0,Unnamed: 1_level_0,sale_price_sqr_foot,gross_rent
year,neighborhood,Unnamed: 2_level_1,Unnamed: 3_level_1
2013,Visitacion Valley,293.298372,2971.0
2014,Visitacion Valley,282.025468,3528.0
2015,Visitacion Valley,302.221873,3739.0
2016,Visitacion Valley,328.319007,4390.0


Vistacion Valley achieved a favorable rental yield, averaging 12 times the sales price. While Vistacion Valley did witness a decline in sales prices in 2014, there has been a subsequent positive trend in sales prices. Additionally, the gross rent has shown consistent growth since 2013.