# Database Playground

This notebook demonstrates the functionalities of our improved Database class.

In [1]:
import my_functions as mf
import pandas as pd

# Create a Database instance
db = mf.Database()

## 1. Accessing Tables and Views

In [2]:
# List all tables
print("Available tables:", list(db.tables.keys()))

# Access a specific table
voter_table = db.voter
print("\nVoter table shape:", voter_table().shape)

# List views for a table
print("\nViews in voter table:", voter_table.list_views())

# Access a specific view
voter_2022 = voter_table.get_view('2022')
print("\nVoter 2022 view shape:", voter_2022.shape)
print("\nVoter 2022 view columns:", voter_2022.columns.tolist())

Available tables: ['ballot', 'demo', 'facility', 'medicare', 'voter']

Voter table shape: (58, 34)

Views in voter table: ['2018', '2020', '2022']

Voter 2022 view shape: (58, 12)

Voter 2022 view columns: ['county_name', 'eligible_2022', 'total_registered_2022', 'democratic_2022', 'republican_2022', 'american_independent_2022', 'green_2022', 'libertarian_2022', 'peace_and_freedom_2022', 'unknown_2022', 'other_2022', 'no_party_preference_2022']


## 2. Adding a New View

In [5]:
# Add a new view to the voter table
db.add_view('voter', 'registered_voters', ['county_name', 'total_registered_2018', 'total_registered_2020', 'total_registered_2022'])

# Verify the new view
print("Updated views in voter table:", db.voter.list_views())
registered_voters = db.get_view('voter', 'registered_voters')
print("\nRegistered voters view:")
print(registered_voters.head())

Updated views in voter table: ['2018', '2020', '2022', 'registered_voters']

Registered voters view:
  county_name  total_registered_2018  total_registered_2020  \
0     Alameda                 881491                 881491   
1      Alpine                    758                    758   
2      Amador                  22305                  22305   
3       Butte                 122741                 122741   
4   Calaveras                  29591                  29591   

   total_registered_2022  
0                 881491  
1                    758  
2                  22305  
3                 122741  
4                  29591  


## 3. Merging Views

In [6]:
# Merge multiple views
merged_views = db.merge_views([('demo', 'population'), ('voter', 'registered_voters')])
print("Merged views shape:", merged_views.shape)
print("\nMerged views columns:", merged_views.columns.tolist())
print("\nSample of merged data:")
print(merged_views.head())

Merged views shape: (58, 6)

Merged views columns: ['county_name', 'population_january_2023', 'median_household_income_2021', 'total_registered_2018', 'total_registered_2020', 'total_registered_2022']

Sample of merged data:
  county_name  population_january_2023  median_household_income_2021  \
0     Alameda                  1636194                        108971   
1      Alpine                     1184                         87570   
2      Amador                    39837                         68159   
3       Butte                   205592                         62982   
4   Calaveras                    44890                         68298   

   total_registered_2018  total_registered_2020  total_registered_2022  
0                 881491                 881491                 881491  
1                    758                    758                    758  
2                  22305                  22305                  22305  
3                 122741                 122741   

## 4. Querying the Database

In [7]:
# Query for counties with population over 1 million
large_counties = db.query({'population_january_2023': lambda x: x > 1000000}, 
                          ['county_name', 'population_january_2023', 'median_household_income_2021'])
print("Large counties (population > 1 million):")
print(large_counties)

# Query for counties with high median household income
high_income_counties = db.query({'median_household_income_2021': lambda x: x > 100000}, 
                                ['county_name', 'population_january_2023', 'median_household_income_2021'])
print("\nHigh income counties (median household income > $100,000):")
print(high_income_counties)

Large counties (population > 1 million):
       county_name  population_january_2023  median_household_income_2021
0          Alameda                1636194.0                      108971.0
6     Contra Costa                1147653.0                      110595.0
9           Fresno                1011499.0                       63140.0
18     Los Angeles                9761210.0                       77356.0
29          Orange                3137164.0                      100210.0
32       Riverside                2439234.0                       78690.0
33      Sacramento                1572453.0                       79611.0
35  San Bernardino                2182056.0                       74218.0
36       San Diego                3269755.0                       90756.0
42     Santa Clara                1886079.0                      141161.0

High income counties (median household income > $100,000):
      county_name  population_january_2023  median_household_income_2021
0         Al

## 5. Advanced Analysis: Voter Registration Trends

In [9]:
# Get voter registration data
voter_reg = db.get_view('voter', 'registered_voters')

# Calculate percentage change in voter registration from 2018 to 2022
voter_reg['pct_change_2018_2022'] = (voter_reg['total_registered_2022'] - voter_reg['total_registered_2018']) / voter_reg['total_registered_2018'] * 100

# Sort by percentage change and display top 10 counties with highest increase
top_increase = voter_reg.sort_values('pct_change_2018_2022', ascending=False).head(10)
print("Top 10 counties with highest increase in voter registration (2018-2022):")
print(top_increase[['county_name', 'pct_change_2018_2022']])

# Calculate and display average change across all counties
avg_change = voter_reg['pct_change_2018_2022'].mean()
print(f"\nAverage change in voter registration across all counties: {avg_change:.2f}%")

Top 10 counties with highest increase in voter registration (2018-2022):
       county_name  pct_change_2018_2022
0          Alameda                   0.0
43      Santa Cruz                   0.0
31          Plumas                   0.0
32       Riverside                   0.0
33      Sacramento                   0.0
34      San Benito                   0.0
35  San Bernardino                   0.0
36       San Diego                   0.0
37   San Francisco                   0.0
38     San Joaquin                   0.0

Average change in voter registration across all counties: 0.00%


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  voter_reg['pct_change_2018_2022'] = (voter_reg['total_registered_2022'] - voter_reg['total_registered_2018']) / voter_reg['total_registered_2018'] * 100


## 6. Error Handling

In [10]:
# Try to access a non-existent table
try:
    db.get_view('non_existent_table', 'some_view')
except ValueError as e:
    print("Error:", str(e))

# Try to access a non-existent view
try:
    db.get_view('voter', 'non_existent_view')
except ValueError as e:
    print("Error:", str(e))

Error: Table 'non_existent_table' not found
Error: View 'non_existent_view' not found
