### 0.1 READ THE DATA SOURCE

In [57]:
import configparser

# Initiate the configparser
config = configparser.ConfigParser()

# Read the config ini file
config.read('config.ini')

# Read the csv file path
csv_file_path = config['path']['house_market']

### 0.2 DEFINE THE HTML DISPLAY

In [58]:
from IPython.display import display, HTML
import pandas as pd

# Define the css that will make the table scrollable
css = """ 
.output {
    max-height: 700px; /* Adjiust as needed*/
    overflow: scroll /* Allows scrolling*/
}
"""

# Apply the css for the scrollable output
display(HTML('<style>{}<style>'.format(css)))

# Configure pandas display options for better visuals
pd.set_option('display.max_rows', None) # Display all rows, adjust as needed
pd.set_option('display.max_columns', None) # Display all columns, adjust as needed
pd.set_option('display.max_colwidth', 90) # Set max column width for long text
pd.options.display.float_format = '{:,.2f}'.format # Format floating-point numbers

____________
### 1. DATA EXPOSURE

In [59]:
# Read the csv file
housing_market = pd.read_csv(csv_file_path, encoding='UTF-8-SIG')

# Function to display the data overview
def display_data (df,title):
    shape_info = f"<div><b>Dataframe Shape:<b>{df.shape}</div>" 
    summary_stats = df.describe().to_html()
    data_types = df.dtypes.to_frame().to_html()

    # Display title, table, data types, summary statistics and complete dataset
    display(HTML(f"<h3>{title}</h3>"))
    display(HTML(shape_info))
    display(HTML(f"<div><b>Data Types:</b></div>"))
    display(HTML(data_types))
    display(HTML(f"<div><b>Summary Statistics:</b></div>"))
    display(HTML(summary_stats))
    display(HTML(f"<div><b>Complete Dataset:</b></div>"))
    display(HTML(df.to_html(index=False)))

# Display the function
display_data(housing_market, "HOUSING MARKET DATA OVERVIEW")




Unnamed: 0,0
Country,object
Year,int64
House Price Index,float64
Rent Index,float64
Affordability Ratio,float64
Mortgage Rate (%),float64
Inflation Rate (%),float64
GDP Growth (%),float64
Population Growth (%),float64
Urbanization Rate (%),float64


Unnamed: 0,Year,House Price Index,Rent Index,Affordability Ratio,Mortgage Rate (%),Inflation Rate (%),GDP Growth (%),Population Growth (%),Urbanization Rate (%),Construction Index
count,200.0,200.0,200.0,200.0,200.0,200.0,200.0,200.0,200.0,200.0
mean,2019.5,130.38,83.05,7.24,4.15,3.65,2.13,0.72,74.77,111.2
std,2.88,28.75,21.44,2.58,1.38,1.88,2.41,1.04,8.73,24.31
min,2015.0,80.55,50.35,3.04,1.54,0.53,-1.92,-0.96,60.17,70.97
25%,2017.0,104.14,60.47,5.03,3.05,1.94,-0.1,-0.18,66.92,90.18
50%,2019.5,129.19,83.72,7.38,4.33,3.66,2.31,0.72,75.1,110.59
75%,2022.0,157.13,100.6,9.28,5.22,5.26,4.27,1.62,82.68,133.78
max,2024.0,179.97,119.86,11.88,6.49,6.91,5.96,2.5,89.79,149.74


Country,Year,House Price Index,Rent Index,Affordability Ratio,Mortgage Rate (%),Inflation Rate (%),GDP Growth (%),Population Growth (%),Urbanization Rate (%),Construction Index
USA,2015,117.45,116.55,9.59,4.49,1.51,-0.75,-0.8,85.99,118.09
USA,2016,150.81,51.44,11.73,5.66,1.88,-0.55,-0.36,69.13,111.98
USA,2017,123.19,70.39,8.51,2.2,2.4,0.93,0.6,83.56,85.97
USA,2018,131.42,91.47,3.42,4.54,1.61,-1.48,2.32,88.97,134.67
USA,2019,110.46,56.84,9.16,3.7,1.29,1.96,-0.88,87.28,90.7
USA,2020,146.25,71.82,7.68,4.23,1.7,5.76,1.71,88.18,141.59
USA,2021,139.79,114.53,3.8,2.48,0.79,0.6,0.36,68.14,136.3
USA,2022,115.68,69.67,7.88,2.2,5.71,-1.4,2.45,83.17,85.9
USA,2023,80.55,107.08,9.36,5.15,5.51,-1.41,0.25,63.48,139.05
USA,2024,142.33,73.16,3.57,3.05,2.61,3.84,1.23,86.62,107.78


#### MARKET INDICATORS OVERVIEW

- **House Price Index:** The average price changes in repeat sales or refinancings on the same properties [Investopedia](https://www.investopedia.com/terms/h/house-price-index-hpi.asp).
- **Rent Index:** The change in rental prices over time, considering geography and property type.
- **Affordability Ratio:** The general cost of living in the region, and the capability of affording basic living goods. [Investopedia](https://www.investopedia.com/terms/a/affordability-index.asp). 
- **Mortgage Rate(%):** The interest charged for a home loan, highly sensitive to economic conditions [Investopedia](https://www.investopedia.com/terms/m/mortgage-rate.asp).
- **Population Growth(%):** Population size changes overtime [WorldBank](https://data.worldbank.org/indicator/SP.POP.GROW?end=2023&start=1961&view=chart).
- **Urbanization Rate (%):** The numbers of persons residing in an area defined as ''urban'' per 100 total population. [WorldBank](https://databank.worldbank.org/metadataglossary/world-development-indicators/series/SP.URB.TOTL.IN.ZS)
- **Construction Index:** Changes in the cost of construction, or price fluctuation of required resources.

____________
### 2. DATA PREPARE


##### A. DEFINE AND APPLY A STANDARD CLEANING FUNCTION

In [60]:
# Define a standard cleaning function
def clean_dataset (df,dtype_mapping=None, index_col=None):
    """ 
    data cleaning function

    Parameters
    1. df: Input DataFrame
    2. dtype_mapping: Dictionary of column: dtype for conversion
    3. index_col: Column to set as index

    Returns
    - cleaned display
    - Display cleaning report
    """
    # Initiate the cleaning report
    report ={
        'original_shape': df.shape,
        'duplicates_removed': 0,
        'missing_values': {},
        'type_changes': {},
        'index_set': None,
        'final_shape': None
    }

    # Create working copy
    df_clean = df.copy()

    # 1. Handle data type conversions
    if dtype_mapping:
        for col, dtype in dtype_mapping.items():
            if col not in df_clean.columns:
                raise ValueError(f"Column '{col}' not found in DataFrame")
            try:
                old_type = str(df_clean[col].dtype)
                df_clean[col] = df_clean[col].astype(dtype)
                new_type = str(df_clean[col].dtype)
                if old_type != new_type:
                    report['type_changes'][col] = f"{old_type} → {new_type}"
            except (ValueError, TypeError) as e:
                raise ValueError(f"Failed to convert column '{col}' to {dtype}: {str(e)}")

    # 2. Identify missing values and report only
    for col in df_clean.columns:
        missing = df_clean[col].isna().sum()
        if missing > 0:
            report['missing_values'][col] = missing
    
   # 3. Remove duplicates
    duplicates = df_clean.duplicated().sum()
    df_clean.drop_duplicates(inplace=True)
    report['duplicates_removed'] = duplicates

    # 4. Set index if specified
    if index_col and index_col in df_clean.columns:
        df_clean.set_index(index_col, inplace=True)
        report['index_set'] = index_col

    # 5. Final metadata
    report['final_shape'] = df_clean.shape

    # Display report
    display_cleaning_report(report)

    return df_clean

def display_cleaning_report(report):
    """Display compact cleaning report"""
    html = """
    <style>
        .cleaning-report {
            font-family: Arial, sans-serif;
            border-collapse: collapse;
            width: 100%;
            margin-bottom: 20px;
        }
        .cleaning-report th {
            background-color: #f2f2f2;
            text-align: left;
            padding: 8px;
            border: 1px solid #ddd;
        }
        .cleaning-report td {
            padding: 8px;
            border: 1px solid #ddd;
        }
        .cleaning-report tr:nth-child(even) {
            background-color: #f9f9f9;
        }
    </style>
    <h3>Data Cleaning Report</h3>
    <table class="cleaning-report">
        <tr>
            <th>Operation</th>
            <th>Details</th>
        </tr>
    """
    
    # Basic stats
    html += f"""
        <tr>
            <td>Original Shape</td>
            <td>{report['original_shape']}</td>
        </tr>
        <tr>
            <td>Final Shape</td>
            <td>{report['final_shape']}</td>
        </tr>
        <tr>
            <td>Duplicates Removed</td>
            <td>{report['duplicates_removed']}</td>
        </tr>
    """
    
    # Missing values
    if report['missing_values']:
        html += """
        <tr>
            <td>Missing Values Found</td>
            <td>
        """
        for col, count in report['missing_values'].items():
            html += f"{col}: {count}<br>"
        html += "</td></tr>"
    
    # Type changes
    html += """
        <tr>
            <td>Data Type Changes</td>
            <td>
    """
    if report['type_changes']:
        for col, change in report['type_changes'].items():
            html += f"{col}: {change}<br>"
    else:
        html += "None"
    html += "</td></tr>"

    
    # Index setting
    if report['index_set']:
        html += f"""
        <tr>
            <td>Index Set</td>
            <td>{report['index_set']}</td>
        </tr>
        """
    
    html += "</table>"
    display(HTML(html))

In [61]:
# Define the new dtype mapping
dtype_spec = {
    'Country': 'object',
    'Year':'int64',
    'House Price Index':'float64',
    'Rent Index':'float64',
    'Affordability Ratio':'float64',
    'Mortgage Rate (%)':'float64',
    'Inflation Rate (%)':'float64',
    'GDP Growth (%)':'float64',
    'Population Growth (%)':'float64',
    'Urbanization Rate (%)':'float64',
    'Construction Index':'float64',
}

# Clean the data
cleaned_df = clean_dataset(
    housing_market,
    dtype_mapping=dtype_spec,
    index_col=None
)

Operation,Details
Original Shape,"(200, 11)"
Final Shape,"(200, 11)"
Duplicates Removed,0
Data Type Changes,


##### B. CREATE DERIVE FIELDS TO OPTIMIZE ANALYSIS

##### Step 1: Add a regional group field for simplied analysis

IMPORTANT: This dataset includes twenty one countries! 
- NA (2)
- LATAM(2)
- EMEA (11)
- APAC (5)

In [62]:
# Define the mapping logic for regional grouping
regional_grouping = {
    # North America
    'USA': 'NA', 
    'Canada': 'NA',
    # Latin America
    'Brazil': 'LATAM', 
    'Mexico': 'LATAM',
    'Argentina': 'LATAM',
    # Europe, Middle East, Africa
    'UK': 'EMEA', 
    'Germany': 'EMEA',
    'France': 'EMEA', 
    'Italy': 'EMEA', 
    'Spain': 'EMEA',
    'South Africa': 'EMEA', 
    'Russia': 'EMEA', 
    'Netherlands': 'EMEA',
    'Sweden': 'EMEA', 
    'Switzerland': 'EMEA', 
    'UAE': 'EMEA',
    # Asia Pacific
    'Australia': 'APAC', 
    'India': 'APAC', 
    'China': 'APAC', 
    'Japan': 'APAC',
    'South Korea': 'APAC',
    'Singapore': 'APAC' 
}

# Simple mapping for regional group addition
cleaned_df['derived_regional_group'] = cleaned_df['Country'].map(regional_grouping)

# Create country-region summary table
regional_validation_table = (
    cleaned_df
    .groupby([ 'derived_regional_group', 'Country'])
    .size()  # Count rows per group
    .reset_index(name='Row Count')
    .sort_values('derived_regional_group') 
    # Format for clean display
    .style
    .hide(axis='index')
)

display(HTML(regional_validation_table.to_html()))

derived_regional_group,Country,Row Count
APAC,Australia,10
APAC,China,10
APAC,India,10
APAC,Japan,10
APAC,South Korea,10
EMEA,UK,10
EMEA,UAE,10
EMEA,Switzerland,10
EMEA,Sweden,10
EMEA,Spain,10


##### Step 2: Add a calcualted derived fields for financial analysis.

- **Real house price index:** Dividing the House Price by one plus Inflation for the **real purchasing power**.
- **Real mortgage cost:** Subtraction of rates Mortgage minus Inflation for the **net borrowing cost**.
- **Ownnership rent spread:** Multiplication of house price/rent index by mortgage to see **how many years of rent equal the house price**.
- **Affordibility gdp population score:** Multiply Affordability by GDP and divide by population growth **adjusted Affordability**
- **Construction urbanizatio ratio:** Divide construction costs by urbanization rate to measure **cost per unit of urbanization** or urbanization cost-efficiency.

In [63]:
# Create derived fields using the chaining method
cleaned_df = (
    cleaned_df.assign(
        # Real house price index adjusted for inflation
        derived_real_house_price_index = lambda df:df['House Price Index']/ (1 + df['Inflation Rate (%)']/100),
        # Mortgage cost net of inflation
        derived_real_mortgage_cost = lambda df:df['Mortgage Rate (%)'] -  df['Inflation Rate (%)'],
        # Ownership vs rent spread
        derived_ownership_rent_spread = lambda df: (df['House Price Index']/df['Rent Index']) * df['Mortgage Rate (%)'],
        # Affordability ratio adjusted to economic/demographic factors
        derived_affordability_gdp_population_score = lambda df:df['Affordability Ratio']*(1 + df['GDP Growth (%)']/100)/ (1 + df['Population Growth (%)']/100),
        # Construction vs urbanization ratio
        derived_construction_urbanization_ratio = lambda df:df['Construction Index']/ (df['Urbanization Rate (%)']/100)
    )
)

# Display the top 5 rows of the table and the included derived fields
#display(HTML(cleaned_df.head(2000).to_html()))

In [64]:
# calculate summary statistics of the new fields
new_stats = cleaned_df[['derived_real_house_price_index', 
                        'derived_real_mortgage_cost', 
                        'derived_ownership_rent_spread',
                        'derived_affordability_gdp_population_score',
                         'derived_construction_urbanization_ratio' ]].describe()

display(HTML(new_stats.to_html()))

Unnamed: 0,derived_real_house_price_index,derived_real_mortgage_cost,derived_ownership_rent_spread,derived_affordability_gdp_population_score,derived_construction_urbanization_ratio
count,200.0,200.0,200.0,200.0,200.0
mean,125.81,0.5,6.99,7.34,150.84
std,27.71,2.36,3.5,2.62,37.72
min,76.34,-4.53,1.6,3.04,81.68
25%,101.43,-0.99,4.28,5.07,122.58
50%,124.01,0.63,6.04,7.44,149.69
75%,152.0,2.23,8.86,9.39,176.49
max,175.73,5.79,19.56,12.39,241.14


______________
### 3. DISCOVERY QUESTIONS

##### ***HOW HAS THE REAL HOUSE PRICE GROWTH CHANGED PER REGIONAL GROUP OVERTIME?***

**Key Trend**: APAC showed the most volatility (-33% to +32%), while EMEA demonstrated relative stability until 2022's downturn.  
**Notable Shifts**:  
- **LATAM**: Extreme swings (2023: +46% → 2024: -27%) reflect emerging market sensitivity.  
- **NA**: Sustained growth (2016-2019) followed by post-pandemic correction.  
- **APAC**: 2016 contraction (-33%) reversed by 2017 infrastructure-led recovery (+32%).  

In [65]:
# Calculate YoY changes grouped by region and country
real_house_analysis_df = (
    cleaned_df[['Country', 'Year', 'House Price Index', 'Inflation Rate (%)', 'derived_regional_group', 'derived_real_house_price_index']]
    .sort_values(['Country', 'Year'])
    .assign(
        real_house_yoy_pct=lambda df: df.groupby('Country')['derived_real_house_price_index']
        .pct_change(1).mul(100).round(2)
    )
    .round(2)
)

# Aggregate regionally (e.g., mean of YoY changes per region/year)
regional_yoy_table = (
    real_house_analysis_df
    .groupby(['Year', 'derived_regional_group'])
    ['real_house_yoy_pct'].median()
    .unstack(level=0)  
    .fillna('N/A')
    
)

# Display yoy changes table
display(HTML(regional_yoy_table.to_html()))

Year,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024
derived_regional_group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
APAC,,-33.21,32.8,-13.88,-20.91,11.91,15.29,-8.94,10.82,3.14
EMEA,,-10.04,1.6,-4.81,-15.37,11.99,0.25,-19.46,8.88,19.26
LATAM,,-15.47,-15.86,49.84,2.64,-21.04,11.59,-16.02,46.13,-27.42
,,14.15,-4.15,1.8,26.63,-0.53,-0.62,-3.72,-7.98,19.59


##### ***WHICH COUNTRIES HAVE THE MOST VOLATILE  HOUSE PRICES?***

**Top 3 Volatile Markets**:  
1. **UAE** (43.4%) - The Impact of Foreign Investment in the Dubai Residential Properties. [EUTAX](https://www.taxobservatory.eu/publication/foreign-investment-in-the-dubai-housing-market-2020-2024/)
2. **Sweden** (40.5%) - Post-COVID Record High Demand for Residential Housing. [Sweden RE Market Outlook](https://content.cbre.se/hubfs/content%20offers/2022%20Sweden%20Real%20Estate%20Market%20Outlook.pdf)
3. **Mexico** (32.4%) - Price Upward Trend for More Than a Decade, Having Mexico City As The Most Expensive Location. [Statista](https://www.statista.com/statistics/613690/house-price-changes-in-mexico-by-quarter/)

**Stability Leaders**:  
- Switzerland (13.8%)  
- Brazil (17.6%)  
- India (19.7%)  

**Insight**:  
> Markets with >30% average volatility experienced at least one crisis event between 2020-2024.

In [66]:
# Calculate YoY CHANGES on real house price index
price_shift_table = (
    cleaned_df
    .groupby(['Year', 'Country'])
    ['derived_real_house_price_index']
    .median()
    .groupby(level=1)
    .pct_change()
    .mul(100)
    .abs()
    .unstack(level=0)  # Transpose to get Countries as rows, Years as columns
    # Add 'Average' column and sort
    .assign(Average=lambda df: df.mean(axis=1, skipna=True))
    .sort_values('Average', ascending=False)
    .style
    .format("{:.1f}%")
    .map(
        lambda x: 'background-color: #FFA07A' if x > 60
        else ('background-color : #FFD700' if x > 20
        else 'background-color : #98FB98')
    )
)

display(HTML(price_shift_table.to_html()))

Year,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,Average
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
UAE,nan%,8.9%,51.2%,39.2%,16.6%,95.3%,40.0%,84.0%,29.0%,26.2%,43.4%
Sweden,nan%,20.4%,38.6%,48.9%,18.5%,38.8%,101.1%,48.3%,13.6%,36.0%,40.5%
Mexico,nan%,7.1%,14.9%,64.0%,18.3%,30.5%,14.3%,29.7%,93.2%,20.0%,32.4%
France,nan%,25.7%,86.3%,38.6%,73.0%,2.3%,18.6%,10.8%,15.3%,19.3%,32.2%
China,nan%,34.5%,37.7%,26.8%,20.9%,11.9%,15.3%,1.9%,32.2%,107.8%,32.1%
Spain,nan%,10.0%,1.6%,23.6%,21.9%,12.0%,62.7%,48.2%,63.8%,30.7%,30.5%
South Korea,nan%,33.2%,49.6%,2.8%,45.1%,57.6%,8.3%,8.9%,13.2%,40.5%,28.8%
USA,nan%,27.9%,18.7%,7.5%,15.7%,31.9%,3.6%,21.1%,30.2%,81.7%,26.5%
Germany,nan%,1.8%,35.0%,4.8%,30.3%,15.0%,16.1%,48.7%,6.7%,70.1%,25.4%
Australia,nan%,23.3%,32.8%,28.6%,5.4%,63.2%,2.4%,39.0%,10.8%,18.2%,24.9%


##### ***WHAT IS THE REAL MORTGAGE COST CHANGE PER REGIONAL GROUP OVERTIME?***

**Key Insights**:  
- **LATAM experienced extreme volatility**, with  swings from -5.17 (2021) to +6.51 (2022) in one year.  

In [67]:
# Create a stored df with year-over-year calculations
owning_renting_analysis = (
    cleaned_df[['Country', 'Year', 'derived_regional_group', 'derived_real_mortgage_cost']]
    .sort_values(['Country', 'Year'])
    .assign(
        real_mortgage_abs_yoy=lambda df: df.groupby('Country')['derived_real_mortgage_cost'].diff(1).round(2)
    )
)

# Create a separated table for tabular display
yoy_table_mortgage = (
    owning_renting_analysis.groupby(['Year','derived_regional_group'])
    ['real_mortgage_abs_yoy']
    .median()
    .unstack()
    .fillna('N/A')
)

# Diaplay yoy changes table
display(HTML(yoy_table_mortgage.to_html()))

derived_regional_group,APAC,EMEA,LATAM,NA
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015,,,,
2016,0.13,-0.58,-0.54,0.64
2017,-0.61,-1.02,1.38,-3.74
2018,-2.17,-0.32,2.86,3.29
2019,5.83,0.83,0.47,0.55
2020,-0.61,0.39,-3.17,-2.11
2021,-1.33,0.82,-5.17,2.92
2022,1.38,-0.63,6.51,-4.49
2023,-2.01,1.3,0.97,3.41
2024,-0.97,-1.94,-0.81,0.41


##### ***WHICH YEARS SAW THE LARGEST AFFORDABILITY SHIFTS PER REGIONAL GROUP?***

**Phase 1: Pre-Pandemic (2015-2019)**  
- **APAC**: Steady erosion (4.78 → 6.86) despite 2017's 69% YoY spike  
- **NA**: Sharp 2020 surge (8.93) erased earlier gains from 2016's peak (10.45)  

**Phase 2: Pandemic & Recovery (2020-2024)**  
- **LATAM**: Catastrophic 2024 collapse (3.62) after brief 2020 recovery (9.63)  
- **EMEA**: Remarkable stability (6.47-7.81) despite 2020's 31.7% YoY swing   

**Insight**:  
> Clear worldwide Affordability Impact During the pre- and initial stages of COVID-19.

In [68]:
# Create a table with conditional formatting
afford_table = (
    cleaned_df.groupby((['Year', 'derived_regional_group']))
    ['derived_affordability_gdp_population_score']
    .median()
    .unstack()
    .style
    .format("{:.2f}")
    .map(
        lambda x: 'background-color: #98FB98' if x > 8
        else ('background-color: #FFD700' if x > 5
        else 'background-color: #FFA07A')
    )
    
)

display(HTML(afford_table.to_html()))

derived_regional_group,APAC,EMEA,LATAM,NA
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015,4.78,8.37,7.0,9.77
2016,5.72,6.47,7.0,10.45
2017,4.95,8.15,4.29,7.16
2018,8.38,7.74,3.75,5.62
2019,5.58,9.91,5.81,6.25
2020,7.51,6.77,9.63,8.93
2021,8.03,8.89,5.71,6.06
2022,8.37,7.19,6.45,6.34
2023,3.73,6.46,8.08,6.52
2024,6.86,7.81,3.62,4.43


Median affordability level per region/year

In [69]:
# Calculate YoY CHANGES in regional affordability
afford_shift_table = (
    cleaned_df
    .groupby(['Year', 'derived_regional_group'])
    ['derived_affordability_gdp_population_score'].median()
    .groupby(level=1)  # Group by region
    .pct_change()      # YoY % change
    .mul(100)
    .abs()             # Focus on magnitude of change
    .unstack()
    .style
    .format("{:.1f}%")  # Format as percentages
    .map(
        lambda x: 'background-color: #FFA07A' if x > 50
        else ('background-color: #FFD700' if x > 30
        else 'background-color: #98FB98')
    )
)

display(HTML(afford_shift_table.to_html()))

derived_regional_group,APAC,EMEA,LATAM,NA
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015,nan%,nan%,nan%,nan%
2016,19.5%,22.7%,0.1%,6.9%
2017,13.3%,25.9%,38.7%,31.5%
2018,69.2%,5.0%,12.7%,21.5%
2019,33.4%,28.0%,55.1%,11.2%
2020,34.5%,31.7%,65.7%,42.9%
2021,6.9%,31.3%,40.7%,32.1%
2022,4.3%,19.2%,12.9%,4.6%
2023,55.4%,10.2%,25.3%,2.8%
2024,83.6%,20.9%,55.3%,32.0%


Largest Affordability Shifts (Absolute YoY % Change)

##### ***HOW DOES THE COST OF OWNING VS RENTING EVOLVE ACROSS REGIONAL GROUPS:*** 
##### How Many Years of Rent Equal the House Price?

**Key Regional Patterns**:  
- **LATAM's Affordability Collapse**: Buying a home went from 5.3 years of rent (2015) to 11.7 years (2024)—a **122% deterioration**. This signals severe ownership accessibility issues.  
- **APAC's Swings**: Extreme volatility (3.31 years in 2019 → 10.0 years in 2015) reflects policy experimentation and speculative bubbles.  
- **NA's Stability Advantage**: Maintained the healthiest balance (4.5-6.7 years), though 2016's spike (11.0 years) revealed mortgage-rate sensitivity.  

**Critical Threshold Breaches**:  
> "When ownership costs exceed 8 years of rent (red), renting becomes economically rational. LATAM breached this in 2019/2024, APAC in 2015/2020/2022, and EMEA in 2022."  


In [70]:
# Create a pivot table with conditional formatting
spread_table = (
    cleaned_df.groupby(['Year', 'derived_regional_group'])
    ['derived_ownership_rent_spread']
    .median()
    .unstack()
    .style
    .format("{:.2f}")
    .map(
        lambda x: 'background-color: #FFA07A' if x > 8
        else ('background-color: #FFD700' if x > 5 
        else 'background-color: #98FB98')
    )
)

display(HTML(spread_table.to_html()))

derived_regional_group,APAC,EMEA,LATAM,NA
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015,10.02,9.22,5.28,4.25
2016,5.86,5.86,8.47,10.97
2017,7.84,5.03,6.45,4.06
2018,3.12,4.72,7.56,5.69
2019,3.31,7.09,10.22,6.57
2020,8.1,5.32,6.22,6.47
2021,5.64,6.8,3.32,5.65
2022,8.63,8.25,6.09,4.46
2023,8.08,6.24,8.47,6.58
2024,5.45,6.1,11.71,5.22


#### ***WHAT IS THE COST PER UNIT OF URBANIZATION PER REGION? AND HOW DOES IT CHANGE OVER TIME?***

Unit: USD per 1% of urbanization

**Decade-Long Trends**:  
- **APAC**: Maintained lowest costs (avg 150 USD/%) but paid stability premium—2020's 57% YoY spike reflected supply chain disruptions.  
- **LATAM's Lost Opportunity**: Despite 2017-2019 improvements, 2024's 203 USD/% represents **failed urbanization value capture**.  

In [71]:
# Create a table with conditional formatting
urban_efficiency = (
    cleaned_df.groupby((['Year','derived_regional_group']))
    ['derived_construction_urbanization_ratio']
    .median()
    .unstack()
    .style
    .format("{:.2f}")
    .map (
        lambda x: 'background-color: #FFA07A' if x > 150
        else ('background-color: #FFD700' if x > 120
        else 'background-color: #98FB98')
    )
)

display(HTML(urban_efficiency.to_html()))

derived_regional_group,APAC,EMEA,LATAM,NA
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015,129.18,139.27,217.42,118.12
2016,159.79,153.83,132.16,171.19
2017,150.8,122.64,133.94,129.39
2018,149.65,169.98,149.26,153.61
2019,101.91,159.49,134.69,155.65
2020,159.47,149.19,186.0,143.06
2021,152.01,135.86,160.7,160.73
2022,180.96,159.14,163.02,136.52
2023,162.62,139.96,153.46,189.43
2024,154.45,132.07,203.8,108.19


Largest cost changes (Absolute YoY % Change)

In [72]:
# Calculate YoY CHANGES in regional urbanization cost
urban_shift_table = (
    cleaned_df
    .groupby(['Year', 'derived_regional_group'])
    ['derived_construction_urbanization_ratio']
    .median()
    .groupby(level=1)
    .pct_change()
    .mul(100)
    .abs()
    .unstack()
    .style
    .format("{:.1f}%")
    .map(
        lambda x: 'background-color: #FFA07A' if x > 30
        else ( 'background-color: #FFD700' if x > 15 
        else 'background-color: #98FB98')
    )
)

display(HTML(urban_shift_table.to_html()))

derived_regional_group,APAC,EMEA,LATAM,NA
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015,nan%,nan%,nan%,nan%
2016,23.7%,10.5%,39.2%,44.9%
2017,5.6%,20.3%,1.4%,24.4%
2018,0.8%,38.6%,11.4%,18.7%
2019,31.9%,6.2%,9.8%,1.3%
2020,56.5%,6.5%,38.1%,8.1%
2021,4.7%,8.9%,13.6%,12.4%
2022,19.0%,17.1%,1.4%,15.1%
2023,10.1%,12.1%,5.9%,38.8%
2024,5.0%,5.6%,32.8%,42.9%
