# Rental Marketplace Analytics - Performance Metrics

This notebook analyzes key performance metrics for the rental marketplace platform, including:

1. **Average Listing Price**: Weekly average price of active rental listings
2. **Occupancy Rate**: Percentage of available rental nights that were booked over a month
3. **Most Popular Locations**: Most frequently booked cities every week
4. **Top Performing Listings**: Properties with the highest confirmed revenue per week

In [3]:
# Import necessary libraries

import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import psycopg2
from pathlib import Path

# Add project root to Python path
project_root = Path.cwd().parent
sys.path.append(str(project_root))

# Import project settings
from src.config.settings import REDSHIFT_CONFIG


# Set up plotting style
sns.set_style("whitegrid")
plt.rcParams["figure.figsize"] = (12, 8)
plt.rcParams["font.size"] = 12

In [6]:
# Function to connect to Redshift
def connect_to_redshift():
    """Establish connection to Redshift."""
    try:
        conn = psycopg2.connect(
            host=REDSHIFT_CONFIG["host"],
            port=REDSHIFT_CONFIG["port"],
            dbname=REDSHIFT_CONFIG["database"],
            user=REDSHIFT_CONFIG["user"],
            password=REDSHIFT_CONFIG["password"],
        )
        print("Connected to Redshift successfully")
        return conn
    except Exception as e:
        print(f"Failed to connect to Redshift: {str(e)}")
        return None


# Function to execute a query and return results as a DataFrame
def execute_query(conn, query):
    """Execute a SQL query and return results as a DataFrame."""
    try:
        return pd.read_sql_query(query, conn)
    except Exception as e:
        print(f"Error executing query: {str(e)}")
        return pd.DataFrame()

In [7]:
# Connect to Redshift
conn = connect_to_redshift()

if conn is None:
    print("Using sample data for demonstration purposes")
    # Create sample data for demonstration
    # This allows the notebook to run even without a Redshift connection

    # Sample data for weekly rental performance
    weeks = pd.date_range(start="2023-01-01", periods=12, freq="W")
    weekly_performance = pd.DataFrame(
        {
            "week_start_date": weeks,
            "avg_listing_price": np.random.uniform(100, 300, len(weeks)),
            "median_listing_price": np.random.uniform(80, 250, len(weeks)),
            "total_active_listings": np.random.randint(100, 500, len(weeks)),
            "new_listings": np.random.randint(10, 50, len(weeks)),
            "occupancy_rate": np.random.uniform(40, 90, len(weeks)),
            "avg_price_per_sqft": np.random.uniform(1, 5, len(weeks)),
        }
    )

    # Sample data for popular locations
    cities = ["New York", "Los Angeles", "Chicago", "Miami", "San Francisco"]
    states = ["NY", "CA", "IL", "FL", "CA"]
    popular_locations = []

    for week in weeks:
        for i, city in enumerate(cities):
            popular_locations.append(
                {
                    "week_start_date": week,
                    "city": city,
                    "state": states[i],
                    "total_bookings": np.random.randint(10, 100),
                    "total_viewings": np.random.randint(100, 500),
                    "avg_price": np.random.uniform(100, 300),
                    "booking_conversion_rate": np.random.uniform(5, 20),
                }
            )

    popular_locations_df = pd.DataFrame(popular_locations)

    # Sample data for top listings
    top_listings = []
    for week in weeks:
        for i in range(10):  # Top 10 listings per week
            top_listings.append(
                {
                    "week_start_date": week,
                    "apartment_id": i + 1,
                    "title": f"Luxury Apartment {i + 1}",
                    "city": np.random.choice(cities),
                    "state": np.random.choice(states),
                    "total_revenue": np.random.uniform(1000, 5000),
                    "total_bookings": np.random.randint(1, 10),
                    "avg_stay_duration": np.random.uniform(2, 10),
                    "occupancy_rate": np.random.uniform(40, 95),
                }
            )

    top_listings_df = pd.DataFrame(top_listings)

Connected to Redshift successfully


## 1. Average Listing Price

Analyzing the average price of active rental listings each week.

In [8]:
if conn is not None:
    # Query weekly rental performance metrics from Redshift
    query = """
    SELECT 
        week_start_date,
        avg_listing_price,
        median_listing_price,
        total_active_listings,
        new_listings,
        occupancy_rate,
        avg_price_per_sqft
    FROM 
        pres_rental_performance_weekly
    ORDER BY 
        week_start_date
    """
    weekly_performance = execute_query(conn, query)

# Display the data
weekly_performance.head()

  return pd.read_sql_query(query, conn)


Unnamed: 0,week_start_date,avg_listing_price,median_listing_price,total_active_listings,new_listings,occupancy_rate,avg_price_per_sqft


In [None]:
# Plot average listing price trend
plt.figure(figsize=(14, 7))
plt.plot(
    weekly_performance["week_start_date"],
    weekly_performance["avg_listing_price"],
    marker="o",
    linewidth=2,
    color="#1f77b4",
)
plt.plot(
    weekly_performance["week_start_date"],
    weekly_performance["median_listing_price"],
    marker="s",
    linewidth=2,
    color="#ff7f0e",
    linestyle="--",
)

plt.title("Weekly Average and Median Listing Prices", fontsize=16)
plt.xlabel("Week", fontsize=14)
plt.ylabel("Price ($)", fontsize=14)
plt.grid(True, alpha=0.3)
plt.xticks(rotation=45)
plt.legend(["Average Price", "Median Price"])
plt.tight_layout()
plt.show()

# Calculate statistics
avg_price = weekly_performance["avg_listing_price"].mean()
max_price = weekly_performance["avg_listing_price"].max()
min_price = weekly_performance["avg_listing_price"].min()
price_trend = (
    weekly_performance["avg_listing_price"].iloc[-1]
    - weekly_performance["avg_listing_price"].iloc[0]
)

print(f"Average listing price across all weeks: ${avg_price:.2f}")
print(f"Maximum average price: ${max_price:.2f}")
print(f"Minimum average price: ${min_price:.2f}")
print(
    f"Price trend (first to last week): ${price_trend:.2f} ({price_trend / weekly_performance['avg_listing_price'].iloc[0] * 100:.1f}%)"
)

## 2. Occupancy Rate

Measuring the percentage of available rental nights that were booked over time.

In [None]:
# Plot occupancy rate trend
plt.figure(figsize=(14, 7))
plt.bar(
    weekly_performance["week_start_date"],
    weekly_performance["occupancy_rate"],
    color="#2ca02c",
    alpha=0.7,
)

# Add a line for the average occupancy rate
avg_occupancy = weekly_performance["occupancy_rate"].mean()
plt.axhline(
    y=avg_occupancy, color="r", linestyle="--", label=f"Average: {avg_occupancy:.1f}%"
)

plt.title("Weekly Occupancy Rate", fontsize=16)
plt.xlabel("Week", fontsize=14)
plt.ylabel("Occupancy Rate (%)", fontsize=14)
plt.grid(True, alpha=0.3)
plt.xticks(rotation=45)
plt.legend()
plt.tight_layout()
plt.show()

# Calculate statistics
max_occupancy = weekly_performance["occupancy_rate"].max()
min_occupancy = weekly_performance["occupancy_rate"].min()
occupancy_trend = (
    weekly_performance["occupancy_rate"].iloc[-1]
    - weekly_performance["occupancy_rate"].iloc[0]
)

print(f"Average occupancy rate: {avg_occupancy:.2f}%")
print(f"Maximum occupancy rate: {max_occupancy:.2f}%")
print(f"Minimum occupancy rate: {min_occupancy:.2f}%")
print(
    f"Occupancy trend (first to last week): {occupancy_trend:.2f}% ({occupancy_trend / weekly_performance['occupancy_rate'].iloc[0] * 100:.1f}%)"
)

## 3. Most Popular Locations

Identifying the most frequently booked cities every week.

In [None]:
if conn is not None:
    # Query popular locations from Redshift
    query = """
    SELECT 
        week_start_date,
        city,
        state,
        total_bookings,
        total_viewings,
        avg_price,
        booking_conversion_rate
    FROM 
        pres_popular_locations_weekly
    ORDER BY 
        week_start_date, total_bookings DESC
    """
    popular_locations_df = execute_query(conn, query)

# Display the data
popular_locations_df.head()

In [None]:
# Get the most recent week's data
latest_week = popular_locations_df["week_start_date"].max()
latest_locations = (
    popular_locations_df[popular_locations_df["week_start_date"] == latest_week]
    .sort_values("total_bookings", ascending=False)
    .head(10)
)

# Plot top 10 cities by bookings for the most recent week
plt.figure(figsize=(14, 8))
bars = plt.barh(
    latest_locations["city"] + ", " + latest_locations["state"],
    latest_locations["total_bookings"],
    color="#d62728",
    alpha=0.7,
)

# Add data labels
for bar in bars:
    width = bar.get_width()
    plt.text(
        width + 1,
        bar.get_y() + bar.get_height() / 2,
        f"{width:.0f}",
        ha="left",
        va="center",
    )

plt.title(
    f"Top 10 Cities by Bookings (Week of {latest_week.strftime('%Y-%m-%d')})",
    fontsize=16,
)
plt.xlabel("Number of Bookings", fontsize=14)
plt.ylabel("City", fontsize=14)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
# Analyze booking trends for top 5 cities over time
top_cities = (
    popular_locations_df.groupby("city")["total_bookings"]
    .sum()
    .nlargest(5)
    .index.tolist()
)
top_cities_data = popular_locations_df[popular_locations_df["city"].isin(top_cities)]

# Pivot the data for plotting
city_trends = top_cities_data.pivot(
    index="week_start_date", columns="city", values="total_bookings"
)

# Plot trends
plt.figure(figsize=(14, 8))
for city in top_cities:
    plt.plot(city_trends.index, city_trends[city], marker="o", linewidth=2, label=city)

plt.title("Booking Trends for Top 5 Cities", fontsize=16)
plt.xlabel("Week", fontsize=14)
plt.ylabel("Number of Bookings", fontsize=14)
plt.grid(True, alpha=0.3)
plt.xticks(rotation=45)
plt.legend(title="City")
plt.tight_layout()
plt.show()

## 4. Top Performing Listings

Tracking properties with the highest confirmed revenue per week.

In [None]:
if conn is not None:
    # Query top performing listings from Redshift
    query = """
    SELECT 
        week_start_date,
        apartment_id,
        title,
        city,
        state,
        total_revenue,
        total_bookings,
        avg_stay_duration,
        occupancy_rate
    FROM 
        pres_top_listings_weekly
    ORDER BY 
        week_start_date, total_revenue DESC
    """
    top_listings_df = execute_query(conn, query)

# Display the data
top_listings_df.head()

In [None]:
# Get the most recent week's data
latest_week = top_listings_df["week_start_date"].max()
latest_listings = (
    top_listings_df[top_listings_df["week_start_date"] == latest_week]
    .sort_values("total_revenue", ascending=False)
    .head(10)
)

# Plot top 10 listings by revenue for the most recent week
plt.figure(figsize=(14, 8))
bars = plt.barh(
    latest_listings["title"],
    latest_listings["total_revenue"],
    color="#9467bd",
    alpha=0.7,
)

# Add data labels
for bar in bars:
    width = bar.get_width()
    plt.text(
        width + 50,
        bar.get_y() + bar.get_height() / 2,
        f"${width:.0f}",
        ha="left",
        va="center",
    )

plt.title(
    f"Top 10 Listings by Revenue (Week of {latest_week.strftime('%Y-%m-%d')})",
    fontsize=16,
)
plt.xlabel("Revenue (USD)", fontsize=14)
plt.ylabel("Listing Title", fontsize=14)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
# Analyze the relationship between occupancy rate and revenue
plt.figure(figsize=(12, 8))
plt.scatter(
    latest_listings["occupancy_rate"],
    latest_listings["total_revenue"],
    s=latest_listings["total_bookings"] * 50,
    alpha=0.6,
    c="#8c564b",
)

# Add labels for each point
for i, row in latest_listings.iterrows():
    plt.annotate(
        row["apartment_id"],
        (row["occupancy_rate"], row["total_revenue"]),
        xytext=(5, 5),
        textcoords="offset points",
    )

plt.title("Relationship Between Occupancy Rate and Revenue", fontsize=16)
plt.xlabel("Occupancy Rate (%)", fontsize=14)
plt.ylabel("Revenue (USD)", fontsize=14)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Calculate correlation
correlation = latest_listings["occupancy_rate"].corr(latest_listings["total_revenue"])
print(f"Correlation between occupancy rate and revenue: {correlation:.2f}")

## Summary of Findings

Based on the analysis of rental marketplace data, we can draw the following conclusions:

1. **Average Listing Price**: [Summary of price trends and insights]

2. **Occupancy Rate**: [Summary of occupancy rate trends and insights]

3. **Most Popular Locations**: [Summary of popular locations and insights]

4. **Top Performing Listings**: [Summary of top listings and insights]

### Recommendations

Based on these findings, we recommend the following actions:

1. [Recommendation 1]
2. [Recommendation 2]
3. [Recommendation 3]

In [None]:
# Close the Redshift connection if it exists
if conn is not None:
    conn.close()
    print("Redshift connection closed")