# FCD Project: Investigation of the relation between the housing prices and other variables

Content:

[Dataset with house pricing](link)

[Dataset with income data](link)

[Weather API](link)

## Setup

Necessary setup:

- importing the necessary libraries
- the dataset files should be in the same directory as the notebook

### Import & install libraries

Either uncomment and run the cell below, or run `uv add req

In [None]:
# uncomment to install the libraries
# %pip install pandas, numpy
# %pip install seaborn

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
import os
import unicodedata
import time
import requests
import sys


# Set a nice style for our plots
# sns.set_style("whitegrid")

warnings.filterwarnings('ignore')

### Project structure

In [None]:
cwd = os.getcwd()

DATA_DIR = os.path.join(cwd, "data")
PDATA_DIR = os.path.join(DATA_DIR, "processed")
API_DATA = os.path.join(DATA_DIR, "api")

dirs = (DATA_DIR, PDATA_DIR, API_DATA)

for d in dirs:
    try: os.mkdir(d)
    except: continue

### Constants

In [None]:
HOUSE_FILE = os.path.join(DATA_DIR, "house_pricing_raw.csv")
HOUSE_EXCEL_FILE = os.path.join(DATA_DIR, "Destaque_HAB_1T2025_EN.xlsx")
INCOME_FILE = os.path.join(DATA_DIR, "582.csv")
DENSITY_FILE = os.path.join(DATA_DIR, "density.csv")
AGE_DISTRIBUTION_FILE = os.path.join(DATA_DIR, "age-distribution.csv")

# Preprocessed data
HOUSE_CLEAN_FILE = os.path.join(PDATA_DIR, "house_NUTS4.csv")
MUNICIPALITIES_FILE = os.path.join(PDATA_DIR, "municipalities_list.csv")
INCOME_CLEAN_FILE = os.path.join(PDATA_DIR, "total_average_income_by_municipality.csv")
WEATHER_QUARTER_FILE = os.path.join(PDATA_DIR, "weather_quarterly.csv")
MASTER_DF_FILE = os.path.join(PDATA_DIR, "master_dataset.csv")
DENSITY_CLEAN_FILE = os.path.join(PDATA_DIR, "density_by_municipality.csv")
AGE_DISTRIBUTION_CLEAN_FILE = os.path.join(PDATA_DIR, "age_distribution_by_municipality.csv")

# Data from API
WEATHER_FILE = os.path.join(API_DATA, "weather.csv")
SERVICES_FILE = os.path.join(API_DATA, 'osm_services_counts.csv')
REGION_FILE = os.path.join(API_DATA, "region_lat_lon.csv")

In [None]:
COLUMN_MISSING_VALUES = "nan count"

In [None]:
NUTS4 = pd.read_excel(HOUSE_EXCEL_FILE, sheet_name="Municipality", skiprows=3, header=None, skipfooter=2)

indexes_length_1 = NUTS4[0][NUTS4[0].str.len() == 1].index.tolist()
indexes_length_2 = NUTS4[0][NUTS4[0].str.len() == 2].index.tolist()
indexes_length_3 = NUTS4[0][NUTS4[0].str.len() == 3].index.tolist()

NUTS1 = NUTS4.iloc[indexes_length_1][1]
NUTS2 = NUTS4.iloc[indexes_length_2][1]
NUTS3 = NUTS4.iloc[indexes_length_3][1]

mun_to_drop = pd.concat([NUTS1, NUTS2, NUTS3]).unique()

MUNICIPALITIES = NUTS4[~NUTS4.isin(mun_to_drop)].dropna(ignore_index=True)[1]
NUTS4 = NUTS4[1]

In [None]:
MUNICIPALITIES

### Fetch regions data (latitude & longtitude)

In [None]:
regions_df = pd.read_csv(os.path.join(API_DATA, REGION_FILE))

## EDA

### Housing pricing

#### Raw data

In [None]:
pd.read_csv(HOUSE_FILE, encoding="latin-1", sep=";")

Last 4 rows can be considered as footer and first 3 are a corrupted header.

In [None]:
house_raw_df = pd.read_csv(HOUSE_FILE, 
                           encoding="latin-1", sep=";",
                           header=None, skiprows=3, skipfooter=4)
house_raw_df.head()

In [None]:
house_raw_df.info()

We see the entirely empty columns 24, 47 and 70. Those are the separation columns between 3 sections of the table: 
- 1st Quartile of dwellings sales per m² (in the last 12 months)[2-23]
- Median value per m² of dwellings sales (in the last 12 months)[25-46]
- 3rd Quartile of dwellings sales per m² (in the last 12 months)[48-69]

We'll use the 2nd column (Designation) and 26-47 columns (Median value per m² of dwellings sales).

#### Preprocessing of the data

In [None]:
# Create new header
quarter_headers = []
years = range(2019, 2026)

for year in years:
    for quarter in range(1, 5):
        if year == 2025 and quarter != 1:
            continue
        if year == 2019 and quarter != 4:
            continue
        quarter_headers.append(f"{quarter}Q {year}")

quarter_headers.reverse()
house_header = ["Designation"] + quarter_headers

# Extract median prices
house_medians = house_raw_df.iloc[:, 25:47]

# Create a new DataFrame
house_df = pd.concat([NUTS4, house_medians], axis=1)
house_df.columns = house_header
house_df = house_df[house_df["Designation"].isin(MUNICIPALITIES)]
house_df.set_index("Designation", inplace=True)
house_df

Missing values are represented as "//".

In [None]:
house_df[COLUMN_MISSING_VALUES] = (house_df[quarter_headers] == '//').sum(axis=1)
house_df[COLUMN_MISSING_VALUES][house_df[COLUMN_MISSING_VALUES] > 0]

We'll delete the rows with more than 11 empty columns.

In [None]:
for col in quarter_headers:
    tmp_series = pd.Series(house_df[col], dtype="str")
    tmp_series = tmp_series.replace(" ", "", regex=True)
    tmp_series = tmp_series.replace("//", np.nan, regex=False)
    house_df[col] = tmp_series.astype(float, errors="ignore")

In [None]:
house_df = house_df.dropna(thresh=10)
house_df = house_df.fillna(house_df.mean())
house_df = house_df.drop(columns=[COLUMN_MISSING_VALUES])
house_df.info()

In [None]:
# Save the municipalities list
MUNICIPALITIES = pd.Series(house_df.index.to_list())
MUNICIPALITIES.to_csv(MUNICIPALITIES_FILE, header=['region'])

house_df.to_csv(os.path.join(PDATA_DIR, "house_NUTS4.csv"))

#### Descriptive Statistics

In [None]:
house_df.describe()

- There is a **consistent increase** in the mean and median values over time, suggesting an **upward trend** in median prices.
- The **standard deviation is relatively high**, indicating substantial variation across regions or categories.
- **Outliers** may be present, as seen in the large gap between the 75th percentile and the maximum value.
- The distribution is likely **right-skewed** (mean > median), with a few very high values pulling the average up.

In [None]:
plt.figure(figsize=(12, 6))
house_df[quarter_headers].mean().iloc[::-1].plot(kind='line', marker="o")
ticks = range(len(quarter_headers))

plt.title('Average Median Prices per Quarter in Portugal')
plt.xlabel('Quarter')
plt.ylabel('Average Price')
plt.xticks(ticks=ticks, labels=quarter_headers[::-1], rotation=45)
plt.grid(True)
plt.tight_layout()

plt.show()

We clearly see the time trend. It implies the addition of the time feature for the modeling.

In [None]:
region_avg_prices = house_df.mean(axis=1)
most_expensive_region = region_avg_prices.idxmax()
cheapest_region = region_avg_prices.idxmin()

In [None]:
region_avg_prices.nlargest(5)

In [None]:
region_avg_prices.nsmallest(5)

In [None]:
regions_to_plot = [most_expensive_region, cheapest_region, 'Porto', 'Faro', 'Aveiro', 'Viseu']  # or any subset
plt.figure(figsize=(12, 6))

for region in regions_to_plot:
    house_df.loc[region][::-1].plot()

plt.legend()
plt.title("Housing Price Trends by Region")
plt.xlabel("Quarter")
plt.ylabel("Median Price (€)")
plt.grid(True)
plt.show()

In [None]:
house_reset = house_df.reset_index()
df_long = house_reset.melt(
    id_vars=['Designation'],
    var_name='Time_Period',
    value_name='Price'
)
df_long.head()
descriptive_stats = df_long.groupby('Designation')['Price'].describe().round(2)
print(descriptive_stats.to_string())

In [None]:
descriptive_stats.to_csv(os.path.join(PDATA_DIR, 'house_n4_descriptive_stats_by_designation.csv'))

### Income

In [None]:
income_raw_df = pd.read_csv(INCOME_FILE, encoding="utf-8")
income_raw_df.head()

#### Preprocessing

In [None]:
{i: "" for i in  income_raw_df.columns}

In [None]:
income_columns = {
    '01. Ano': 'year',
    '02. Nome Região (Portugal)': 'region',
    '03. Âmbito Geográfico': 'scope',
    '04. Filtro 1': 'education_level',
    '08. Símbolo': 'symbol',
    '07. Escala': 'unit',
    '09. Valor': 'value'
 }

In [None]:
income_df = income_raw_df.drop(columns=["05. Filtro 2", "06. Filtro 3"]).rename(columns=income_columns)
income_df = income_df.dropna(subset=["value", "year", "scope", "education_level"])
income_df["year"] = income_df["year"].astype(int)
income_df = income_df[income_df["scope"] == "Município"]
income_df = income_df[income_df["year"] >= 2019]
income_df

In [None]:
total_income_df = income_df[income_df['education_level'] == 'Total'].copy()
total_income_file = os.path.join(PDATA_DIR, 'total_average_income_by_municipality.csv')
total_income_df.to_csv(total_income_file, index=False)

total_income_df.head()

In [None]:
education_income_df = income_df[income_df['education_level'] != 'Total'].copy()
avg_income_file = os.path.join(PDATA_DIR, 'average_income_by_education.csv')
education_income_df.to_csv(avg_income_file, index=False)

education_income_df.head()

#### Descriptive statistics

In [None]:
income_avg_df = education_income_df.groupby('region')['value'].mean().reset_index()

print("\n--- Top 10 Consistently Highest-Income Municipalities (Avg. 2019-2023) ---")
top_10_consistent = income_avg_df.sort_values('value', ascending=False).head(10)
print(top_10_consistent)

print("\n--- Bottom 10 Consistently Lowest-Income Municipalities (Avg. 2019-2023) ---")
bottom_10_consistent = income_avg_df.sort_values('value', ascending=True).head(10)
print(bottom_10_consistent)

In [None]:
# We group by 'Year' and get the mean of ALL municipalities for that year.
df_avg_by_year = education_income_df.groupby('year')['value'].mean().reset_index()

# Plot 2: Line Chart (Evolution)
plt.figure(figsize=(10, 6))
sns.lineplot(data=df_avg_by_year, x='year', y='value', marker='o')

plt.title('Average Municipal Income in Portugal (2015-2023)')
plt.xlabel('Year')
plt.ylabel('Average Monthly Income (Euros)')
plt.show()

In [None]:
plt.figure(figsize=(12, 7))
sns.boxplot(data=education_income_df, x='year', y='value')

plt.title('Distribution of Municipal Income (2019-2023)')
plt.xlabel('Year')
plt.ylabel('Average Monthly Income (Euros)')
plt.show()

In [None]:
richest_name = top_10_consistent['region'].iloc[0]
poorest_name = bottom_10_consistent['region'].iloc[0]

# We'll plot Lisboa, Porto, and the #1 most/least wealthy from our consistent list
mun_to_compare = ['Lisboa', 'Porto', richest_name, poorest_name]
df_compare = education_income_df[education_income_df['region'].isin(mun_to_compare)]

plt.figure(figsize=(12, 7))
sns.lineplot(data=df_compare, x='year', y='value', hue='region', marker='o')

plt.title(f"Income Evolution for Select Municipalities (2019-2023)")
plt.xlabel('Year')
plt.ylabel('Average Monthly Income (Euros)')
plt.legend()
plt.show()

### Weather

In [None]:
weather_daily_df = pd.read_csv(WEATHER_FILE, index_col=0)
weather_daily_df["date"] = pd.to_datetime(weather_daily_df["date"])
weather_daily_df = weather_daily_df[weather_daily_df["date"] >= "2019-10-01"]
weather_daily_df["quarter_year"] = weather_daily_df["date"].dt.to_period("Q").astype(str).str.replace(r'(\d{4})Q(\d)', r"\2Q \1", regex=True)
weather_daily_df = weather_daily_df.rename(columns={'region': 'municipality'})

#### Feature engineering

Convert sunshine duration to hours (originally in seconds).

In [None]:
weather_daily_df["sunshine_duration_h"] = weather_daily_df["sunshine_duration_s"] / 3600
weather_daily_df = weather_daily_df.drop(columns=["sunshine_duration_s"])
weather_daily_df.head()

In [None]:
# Helper columns
weather_daily_df["is_rainy"] = weather_daily_df["precipitation_sum_mm"] > 0
weather_daily_df["is_sunny"] = weather_daily_df["sunshine_duration_h"] >= 8
weather_daily_df["is_windy"] = weather_daily_df["windspeed_mean_kmh"] > 30.0
weather_daily_df["no_rain"] = weather_daily_df["precipitation_sum_mm"] == 0
weather_daily_df["is_temp_ok"] = (weather_daily_df["temp_min_c"] >= 17) & (weather_daily_df["temp_max_c"] <= 28)
weather_daily_df["temp_mean_c"] = (weather_daily_df["temp_max_c"] + weather_daily_df["temp_min_c"]) / 2

In [None]:
weather_quarter_df = weather_daily_df.groupby(by=["municipality", "quarter_year"]).agg(
    # aggregate for the quarter values
    total_sunshine_h=pd.NamedAgg(column="sunshine_duration_h", aggfunc="sum"),
    mean_sunshine_h=pd.NamedAgg(column="sunshine_duration_h", aggfunc="mean"),
    windspeed_mean_kmh=pd.NamedAgg(column="windspeed_mean_kmh", aggfunc="mean"),
    total_precipitation_mm=pd.NamedAgg(column="precipitation_sum_mm", aggfunc="sum"),
    mean_precipitation_mm=pd.NamedAgg(column="precipitation_sum_mm", aggfunc="mean"),

    # features
    windy_days=pd.NamedAgg(column="is_windy", aggfunc="sum"),
    rainy_days=pd.NamedAgg(column="is_rainy", aggfunc="sum"),
    sunny_days=pd.NamedAgg(column="is_sunny", aggfunc="sum"),
    warm_days=pd.NamedAgg(column="is_temp_ok", aggfunc="sum")
).reset_index()
weather_quarter_df[weather_quarter_df["municipality"] == "Aveiro"]
# weather_quarter_df

In [None]:
weather_quarter_df.to_csv(WEATHER_QUARTER_FILE, index=False)

#### Bivariate analysis (Weather & House prices)

In [None]:
pass

### Services

In [None]:
service_cols = [
    "cinema", "college", "courthouse", "fire_station", "hospital",
    "kindergarten", "library", "mall", "museum", "pharmacy", "police",
    "post_office", "school", "station", "theatre", "university",
]

# Density

In [None]:
df_density = pd.read_csv(DENSITY_FILE, encoding='utf-8-sig')
df_density.head()

In [None]:
df_density['01. Ano'] = df_density['01. Ano'].astype('Int64')
df_density = df_density[df_density['04. Âmbito Geográfico'] == 'Município']
df_density.drop(columns=['02. Nome País (Europa)','04. Âmbito Geográfico','05. Filtro 1', '06. Filtro 2', '07. Filtro 3','09. Símbolo', "Max. 08. Escala "], inplace=True)

df_density = df_density[(df_density['01. Ano'] >= 2019) & (df_density['01. Ano'] <= 2023)]

df_density.rename(columns={"03. Nome Região (Portugal)": "municipality", "01. Ano": "year", "10. Valor": "people/km2"}, inplace=True)

df_density = df_density.reset_index(drop=True)

df_density.head()

In [None]:
df_density.to_csv(DENSITY_CLEAN_FILE, index=False)

# Age Distribution

In [None]:
df_age = pd.read_csv(AGE_DISTRIBUTION_FILE, encoding='utf-8-sig')
df_age.head()

In [None]:
list(df_age)

In [None]:
df_age['01. Ano'] = df_age['01. Ano'].astype('Int64')
df_age = df_age[df_age['04. Âmbito Geográfico'] == 'Município']
df_age = df_age[df_age["05. Filtro 1"] == "Total"]
df_age.drop(columns=["02. Nome País (Europa)","04. Âmbito Geográfico","05. Filtro 1", "07. Filtro 3","08. Escala ", "09. Símbolo"], inplace=True)
df_age = df_age[(df_age['01. Ano'] >= 2019) & (df_age['01. Ano'] <= 2023)]

df_age.rename(columns={"03. Nome Região (Portugal)": "municipality", "01. Ano": "year", "06. Filtro 2": "age_group", "10. Valor": "number_of_people"}, inplace=True)
df_age = df_age.reset_index(drop=True)

df_age = df_age[df_age['age_group'] != 'Total']
df_age['age_group'] = df_age['age_group'].str.replace(' anos', '').str.strip()

df_age = df_age.pivot(
    index = ['municipality', 'year'],
    columns="age_group",
    values="number_of_people",
).fillna(0)


In [None]:
df_age.to_csv(AGE_DISTRIBUTION_CLEAN_FILE)

### Combined

Since we have income data only until 2023 year we'll cut the data to that extent.

In [None]:
df_house = pd.read_csv(HOUSE_CLEAN_FILE)
df_weather = pd.read_csv(WEATHER_QUARTER_FILE)
df_income = pd.read_csv(INCOME_CLEAN_FILE)
df_services = pd.read_csv(SERVICES_FILE)

quarter_mapper = {q:i for i, q in enumerate(quarter_headers[::-1], start=1)}


In [None]:
melted_cols = [col for col in df_house.columns if col != 'Designation']
df_house_melted = df_house.melt(
    id_vars=['Designation'],
    value_vars=melted_cols,
    var_name='quarter_year',
    value_name='house_price'
).rename(columns={'Designation': 'municipality'})

df_merged_quarterly = pd.merge(
    df_house_melted,
    df_weather,
    on=['municipality', 'quarter_year'],
    how='inner'
)

df_income_clean = df_income[['year', 'region', 'value']].rename(
    columns={'region': 'municipality', 'value': 'avg_income'}
)
df_income_clean['municipality'] = df_income_clean['municipality'].str.replace(r'\s*\[.*\]', '', regex=True).str.strip()
df_services_clean = df_services.rename(
    columns={'area': 'municipality'}
)
df_merged_quarterly['year'] = df_merged_quarterly['quarter_year'].str.split(' ').str[-1].astype(int)
df_merged_quarterly["quarter_num"] = df_merged_quarterly['quarter_year'].str.split(" ").str[0].replace("Q", "", regex=True).astype(int)

df_merged_quarterly["quarter_ord"] = df_merged_quarterly["quarter_year"].map(quarter_mapper)
df_merged_quarterly = df_merged_quarterly[df_merged_quarterly["year"] <= 2023]



df_merged_quarterly

In [None]:
df_merged_with_income = pd.merge(
    df_merged_quarterly,
    df_income_clean,
    on=['municipality', 'year'],
    how='left'
)
df_merged_with_density = pd.merge(
    df_merged_with_income,
    df_density,
    on=['municipality', 'year'],
    how='left'
)
df_merged_with_age_distribution = pd.merge(
    df_merged_with_density,
    df_age,
    on=['municipality', 'year'],
    how='left'
)
df_final = pd.merge(
    df_merged_with_age_distribution,
    df_services_clean,
    on='municipality',
    how='left'
)

df_final.head()

In [None]:
df_final = df_final[df_final["house_price"].notna() & (df_final["house_price"] > 0)].copy()
df_final["log_price_sqm"] = np.log(df_final["house_price"])
df_a = df_final.drop(columns=["year", "quarter_year", "house_price"])
df_a.dropna(subset=service_cols, inplace=True)
df_a.info()

In [None]:
model_df = df_a.dropna(subset=df_a.columns.to_list()[1:])
y = model_df["log_price_sqm"]
X = model_df.drop(columns=["log_price_sqm"])
model_df.reset_index(drop=True)

In [None]:
model_df.to_csv(MASTER_DF_FILE)

In [None]:
df_density.head()

Merging density and age distribution to the master dataframe

In [None]:
model_df = pd.merge(
    model_df,
    df_age,
    how="left",
    on = ["municipality", "year"]
)

model_df = pd.merge(
    model_df,
    df_density,
    how="left",
    on = ["municipality", "year"]
)

model_df.head()

## Modeling (Linear Regression)

In [None]:
df = pd.read_csv(MASTER_DF_FILE).drop(columns="Unnamed: 0")
df

In [None]:
y = df["log_price_sqm"]
X = df.drop(columns=["municipality", "log_price_sqm"])
X

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import RidgeCV, LassoCV, ElasticNetCV
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score

Since we have different scales (sunhine in hours (500+) and malls (up to 10-20)), we'll standardize the data. Also since the number of samples is relatively small ($\approx$ 4000) we can use `Ridge`, `Lasso` and `ElasticNet` regression models.

In [None]:
SEED = 42
SPLIT_SIZE = 0.2
MAX_ITER = 4000
ALPHAS = np.logspace(-6, 6, 40)

In [None]:
scaler = StandardScaler()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=SPLIT_SIZE, random_state=SEED
)

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

feature_names = scaler.get_feature_names_out()

### Lasso 

Linear Model trained with L1 prior as regularizer (aka the Lasso). Feature selection.

In [None]:
lasso = LassoCV(alphas=ALPHAS, random_state=SEED, positive=True)
lasso.fit(X_train_scaled, y_train)

y_pred_lasso = lasso.predict(X_test_scaled)

print(f"R2 score: {lasso.score(X_test_scaled, y_test):.6f}")
print(f"MSE: {mean_squared_error(y_test, y_pred_lasso)}")

In [None]:
lasso_coefs = lasso.coef_

lasso_coef_df = (
    pd.DataFrame({"feature": feature_names, "coef": lasso_coefs})
      .sort_values("coef", ascending=False)
)

lasso_coef_df[lasso_coef_df["coef"] != 0]

In [None]:
plt.figure(figsize=(8, 5))
plt.barh(lasso_coef_df['feature'], lasso_coef_df['coef'], color='skyblue')
plt.gca().invert_yaxis()

plt.title('Feature Importance/Coefficient Values', fontsize=16)
plt.xlabel('Coefficient Value', fontsize=12)
plt.ylabel('Feature', fontsize=12)
plt.grid(axis='x', linestyle='--', alpha=0.6)
plt.tight_layout()

### Ridge

In [None]:
ridge = RidgeCV(alphas=ALPHAS)
ridge.fit(X_train_scaled, y_train)

y_pred_ridge = ridge.predict(X_test_scaled)

print(f"R2 score: {ridge.score(X_test_scaled, y_test):.6f}")
print(f"MSE: {mean_squared_error(y_test, y_pred_ridge)}")

In [None]:
ridge_coefs = ridge.coef_

ridge_coef_df = (
    pd.DataFrame({"feature": feature_names, "coef": ridge_coefs})
      .sort_values("coef", ascending=False)
)

plt.figure(figsize=(8, 5))
plt.barh(ridge_coef_df['feature'], ridge_coef_df['coef'], color='skyblue')
plt.gca().invert_yaxis()

plt.title('Feature Importance/Coefficient Values', fontsize=16)
plt.xlabel('Coefficient Value', fontsize=12)
plt.ylabel('Feature', fontsize=12)
plt.grid(axis='x', linestyle='--', alpha=0.6)
plt.tight_layout()

### ElasticNet

Linear regression with combined L1 and L2 priors as regularizer.

In [None]:
elastic = ElasticNetCV(alphas=ALPHAS, random_state=SEED)
elastic.fit(X_train_scaled, y_train)

y_pred_elastic = elastic.predict(X_test_scaled)

print(f"R2 score: {elastic.score(X_test_scaled, y_test):.6f}")
print(f"MSE: {mean_squared_error(y_test, y_pred_elastic)}")
elastic.intercept_

In [None]:
en_coefs = elastic.coef_

elastic_coef_df = (
    pd.DataFrame({"feature": feature_names, "coef": en_coefs})
      .sort_values("coef", ascending=False)
)

plt.figure(figsize=(8, 5))
plt.barh(elastic_coef_df['feature'], elastic_coef_df['coef'], color='skyblue')
plt.gca().invert_yaxis()

plt.title('Feature Importance/Coefficient Values', fontsize=16)
plt.xlabel('Coefficient Value', fontsize=12)
plt.ylabel('Feature', fontsize=12)
plt.grid(axis='x', linestyle='--', alpha=0.6)
plt.tight_layout()