# Project 2

In [1]:
import pandas as pd
import plotly.express as px

In [2]:
import plotly.io as pio

pio.renderers.default = "vscode+jupyterlab+notebook_connected"

Hypothesis: After Covid, the world has face a great amount of inflationery pressure never seen before in the past decades. As such there is a lot of public discourse on how these rising prices affect real economy. For the US, rental has been the most noticable and impactful change. In this project, I hope to find a positive correlation between the population growth rate and rental price level. 

***Data  Import***

In [3]:
uspop = pd.read_csv("uspop.csv")
uspop.head()

Unnamed: 0,Rank,US State,Population 2024,Population 2023,Growth Rate,% of US,Density (/mile2)
0,1,California,38889770,38965193,-0.0019,0.1158,250
1,2,Texas,30976754,30503301,0.0155,0.0922,119
2,3,Florida,22975931,22610726,0.0162,0.0684,428
3,4,New York,19469232,19571216,-0.0052,0.058,413
4,5,Pennsylvania,12951275,12961683,-0.0008,0.0386,289


In [4]:
usrent = pd.read_csv("usrent.csv")
usrent.head()

Unnamed: 0,state,MedianRent,AverageRentZillow
0,Alabama,925.0,1461.0
1,Alaska,1345.0,1489.0
2,Arizona,1308.0,2195.0
3,Arkansas,868.0,1235.0
4,California,1856.0,3000.0


***Data Cleaning***

In [5]:
#standardize the col name for merging
uspop.rename(columns={'US State': 'state'}, inplace=True)
uspop.rename(columns={'Growth Rate': 'GrowthRate'}, inplace=True)

#drop irrelevant columns
uspop = uspop[['state', 'GrowthRate']]

uspop.head()

Unnamed: 0,state,GrowthRate
0,California,-0.0019
1,Texas,0.0155
2,Florida,0.0162
3,New York,-0.0052
4,Pennsylvania,-0.0008


In [6]:
#drop unmatched row
usrent = usrent[usrent['state'] != 'District of Columbia']

#drop irrelevant col
usrent = usrent[["state",'AverageRentZillow']]

usrent.head()

Unnamed: 0,state,AverageRentZillow
0,Alabama,1461.0
1,Alaska,1489.0
2,Arizona,2195.0
3,Arkansas,1235.0
4,California,3000.0


In [7]:
merge_df = pd.merge(uspop,usrent, on='state')
merge_df.head()

Unnamed: 0,state,GrowthRate,AverageRentZillow
0,California,-0.0019,3000.0
1,Texas,0.0155,1886.0
2,Florida,0.0162,2575.0
3,New York,-0.0052,3400.0
4,Pennsylvania,-0.0008,1525.0


Visualization 1: Rent Ranking

In [8]:
usrent_sorted = usrent.sort_values(by='AverageRentZillow', ascending=False)

fig = px.bar(usrent_sorted, 
             x='state', 
             y='AverageRentZillow', 
             title='Average Rent by State (Highest to Lowest)', 
             labels={'state': 'State', 'AverageRentZillow': 'Average Rent Zillow'},
             color='AverageRentZillow',  
             color_continuous_scale='Viridis')


fig.update_xaxes(tickangle=90, tickfont=dict(size=8))

fig.show()

Visualization 2: Growth Ranking

In [9]:
uspop_sorted = uspop.sort_values(by='GrowthRate', ascending=False)


fig = px.bar(uspop_sorted, 
             x='state', 
             y='GrowthRate', 
             title='Growth Rate by State (Highest to Lowest)', 
             labels={'state': 'State', 'Growth Rate': 'Growth Rate'},
             color='GrowthRate',  
             color_continuous_scale='Viridis')


fig.update_xaxes(tickangle=90, tickfont=dict(size=8))


fig.show()

Visualization 3: Scatterplot

In [10]:
fig = px.scatter(merge_df, 
                 x='AverageRentZillow',  
                 y='GrowthRate',        
                 title='Average Rent Zillow vs Growth Rate',
                 labels={'Growth Rate': 'Growth Rate', 'AverageRentZillow': 'Average Rent Zillow'},
                 color='GrowthRate',  
                 color_continuous_scale='Viridis',
                 trendline='ols')

fig.show()

***Conclusion***

    1. The states are varied on the both population growth level and rental price level, ranging from less than one thousand to more than three thousand USD.
    2. While there is clear outlier i.e. New York has the highest rent and most negative growth, there is also opposite data, i.e. Florida with one of the most expensive rent but still very high growth rate. As a result The correlation overall is not clear between rental price level and state growth
    3. More analysis could be done with more detailed factor of growth (immigration, fertility rate) and using rental price change instead of price level.