# Active Rental Licenses Data Exploration

This notebook explores the Active Rental Licenses dataset from Minneapolis.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', 50)

# Set matplotlib style
try:
    plt.style.use('seaborn-v0_8-darkgrid')
except:
    try:
        plt.style.use('seaborn-darkgrid')
    except:
        print('Using default matplotlib style')

## Load the Data

In [None]:
# Load the CSV file
df = pd.read_csv('source/Active_Rental_Licenses.csv')

# Display basic information
print(f"Dataset shape: {df.shape}")
print(f"\nColumns ({len(df.columns)}):")
for i, col in enumerate(df.columns):
    print(f"{i+1:2d}. {col}")

## Data Overview

In [None]:
# Display first few rows
df.head()

In [None]:
# Data types and null values
df.info()

In [None]:
# Summary statistics for numeric columns
df.describe()

## Data Cleaning

In [None]:
# Convert date columns to datetime
date_columns = ['issueDate', 'expirationDate']
for col in date_columns:
    df[col] = pd.to_datetime(df[col], errors='coerce')

# Display date range
print(f"Issue dates range: {df['issueDate'].min()} to {df['issueDate'].max()}")
print(f"Expiration dates range: {df['expirationDate'].min()} to {df['expirationDate'].max()}")

## License Categories Analysis

In [None]:
# License category distribution
category_counts = df['category'].value_counts()
print("License Categories:")
print(category_counts)

# Visualize
plt.figure(figsize=(10, 6))
category_counts.plot(kind='bar')
plt.title('Distribution of License Categories')
plt.xlabel('Category')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Status distribution
status_counts = df['status'].value_counts()
print("License Status:")
print(status_counts)

# Pie chart
plt.figure(figsize=(8, 8))
plt.pie(status_counts.values, labels=status_counts.index, autopct='%1.1f%%')
plt.title('License Status Distribution')
plt.show()

## Geographic Analysis

In [None]:
# Ward distribution
ward_counts = df['ward'].value_counts().sort_index()
print("Licenses by Ward:")
print(ward_counts)

plt.figure(figsize=(12, 6))
ward_counts.plot(kind='bar')
plt.title('Number of Rental Licenses by Ward')
plt.xlabel('Ward')
plt.ylabel('Number of Licenses')
plt.tight_layout()
plt.show()

In [None]:
# Top neighborhoods
neighborhood_counts = df['neighborhoodDesc'].value_counts().head(15)
print("Top 15 Neighborhoods by License Count:")
print(neighborhood_counts)

plt.figure(figsize=(12, 8))
neighborhood_counts.plot(kind='barh')
plt.title('Top 15 Neighborhoods by Rental License Count')
plt.xlabel('Number of Licenses')
plt.ylabel('Neighborhood')
plt.tight_layout()
plt.show()

## Licensed Units Analysis

In [None]:
# Licensed units statistics
print("Licensed Units Statistics:")
print(df['licensedUnits'].describe())

# Distribution of licensed units
plt.figure(figsize=(10, 6))
df['licensedUnits'].value_counts().sort_index().plot(kind='bar')
plt.title('Distribution of Licensed Units per Property')
plt.xlabel('Number of Licensed Units')
plt.ylabel('Count')
plt.tight_layout()
plt.show()

In [None]:
# Total units by ward
units_by_ward = df.groupby('ward')['licensedUnits'].sum().sort_values(ascending=False)
print("Total Licensed Units by Ward:")
print(units_by_ward)

plt.figure(figsize=(10, 6))
units_by_ward.plot(kind='bar')
plt.title('Total Licensed Units by Ward')
plt.xlabel('Ward')
plt.ylabel('Total Licensed Units')
plt.tight_layout()
plt.show()

## Short-Term Rental Analysis

In [None]:
# Short-term rental distribution
str_counts = df['shortTermRental'].value_counts()
print("Short-Term Rental Status:")
print(str_counts)

# Percentage of short-term rentals
str_percentage = (str_counts.get('Yes', 0) / len(df)) * 100
print(f"\nPercentage of Short-Term Rentals: {str_percentage:.2f}%")

## Time-based Analysis

In [None]:
# Extract year from issue date
df['issueYear'] = df['issueDate'].dt.year

# Licenses issued by year
yearly_issues = df['issueYear'].value_counts().sort_index()
print("Licenses Issued by Year:")
print(yearly_issues.tail(10))  # Last 10 years

plt.figure(figsize=(12, 6))
yearly_issues[yearly_issues.index >= 2015].plot(kind='bar')
plt.title('Rental Licenses Issued by Year (2015-Present)')
plt.xlabel('Year')
plt.ylabel('Number of Licenses Issued')
plt.tight_layout()
plt.show()

In [None]:
# Upcoming expirations
today = pd.Timestamp.now()
df['daysToExpiration'] = (df['expirationDate'] - today).dt.days

# Licenses expiring soon (within 90 days)
expiring_soon = df[(df['daysToExpiration'] > 0) & (df['daysToExpiration'] <= 90)]
print(f"Licenses expiring within 90 days: {len(expiring_soon)}")

# Already expired
expired = df[df['daysToExpiration'] < 0]
print(f"Already expired licenses: {len(expired)}")

## Owner Analysis

In [None]:
# Top property owners by number of licenses
top_owners = df['ownerName'].value_counts().head(20)
print("Top 20 Property Owners by License Count:")
for i, (owner, count) in enumerate(top_owners.items(), 1):
    print(f"{i:2d}. {owner}: {count} licenses")

In [None]:
# Owner location analysis (by state)
owner_states = df['ownerState'].value_counts().head(10)
print("Top 10 Owner States:")
print(owner_states)

# Percentage of out-of-state owners
out_of_state = df[df['ownerState'] != 'MN'].shape[0]
out_of_state_pct = (out_of_state / len(df)) * 100
print(f"\nPercentage of out-of-state owners: {out_of_state_pct:.2f}%")

## Geographic Visualization

In [None]:
# Scatter plot of rental properties
plt.figure(figsize=(12, 10))
plt.scatter(df['longitude'], df['latitude'], alpha=0.5, s=10)
plt.title('Geographic Distribution of Rental Properties in Minneapolis')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
# Color-coded by ward
plt.figure(figsize=(14, 10))
scatter = plt.scatter(df['longitude'], df['latitude'], 
                     c=df['ward'], cmap='tab20', alpha=0.6, s=20)
plt.colorbar(scatter, label='Ward')
plt.title('Rental Properties by Ward')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## Summary Statistics

In [None]:
# Generate summary report
print("=== MINNEAPOLIS RENTAL LICENSE SUMMARY ===")
print(f"\nTotal Active Licenses: {len(df):,}")
print(f"Total Licensed Units: {df['licensedUnits'].sum():,}")
print(f"\nLicense Categories:")
for cat, count in df['category'].value_counts().items():
    print(f"  - {cat}: {count:,} ({count/len(df)*100:.1f}%)")
print(f"\nShort-Term Rentals: {(df['shortTermRental'] == 'Yes').sum():,}")
print(f"Delinquent Licenses: {(df['status'] == 'Delinquent').sum():,}")
print(f"\nAverage units per license: {df['licensedUnits'].mean():.2f}")
print(f"Median units per license: {df['licensedUnits'].median():.0f}")
print(f"\nUnique property owners: {df['ownerName'].nunique():,}")
print(f"Out-of-state owners: {out_of_state:,} ({out_of_state_pct:.1f}%)")