In [7]:
import json

# Load the GeoJSON data from file
with open("gz_2010_us_050_00_5m.json", 'r') as f:
    data = json.load(f)

# Extract features (list of county data)
feature_list = data['features']

# TASK 1: Basic Summary Statistics
print(f"Number of Counties in US: {len(feature_list)}")
print(f"The second feature name is {feature_list[1]['properties']['NAME']}")

Number of Counties in US: 3221
The second feature name is Wade Hampton


In [5]:
# TASK 2: Finding Unique County Names and Most Common County Names
unique_counties = {feature['properties']['NAME'] for feature in feature_list}
print(f"Total unique county names: {len(unique_counties)}")

# Dictionary to count occurrences of county names and track associated states
county_counts = {}
county_states = {}

for feature in feature_list:
    name = feature['properties']['NAME']
    state = feature['properties']['STATE']
    
    # Initialize dictionary entries if not present
    county_counts.setdefault(name, 0)
    county_states.setdefault(name, set())
    
    # Update counts and state tracking
    county_counts[name] += 1
    county_states[name].add(state)

# Sort counties by frequency in descending order and get the top 3
sorted_counties = sorted(county_counts.items(), key=lambda item: item[1], reverse=True)[:3]

print("Top 3 most common county names:")
for name, count in sorted_counties:
    print(f"{name}: {count} occurrences in states {county_states[name]}")


Total unique county names: 1909
Top 3 most common county names:
Washington: 31 occurrences in states {'29', '37', '19', '50', '13', '51', '48', '05', '16', '22', '39', '41', '42', '21', '40', '27', '31', '44', '36', '55', '08', '17', '20', '12', '23', '47', '01', '28', '49', '24', '18'}
Franklin: 26 occurrences in states {'29', '37', '19', '50', '13', '51', '48', '05', '18', '16', '22', '39', '42', '21', '31', '17', '53', '20', '12', '23', '47', '01', '25', '28', '36'}
Jefferson: 26 occurrences in states {'29', '19', '13', '48', '05', '18', '16', '22', '39', '41', '42', '21', '40', '31', '55', '08', '17', '53', '20', '12', '47', '01', '28', '54', '36', '30'}


In [6]:
# TASK 3: State-wise County Statistics
state_data = {}

for feature in feature_list:
    state = feature['properties']['STATE']
    county_name = feature['properties']['NAME']
    area = feature['properties']['CENSUSAREA']
    
    # Initialize state tracking if not present
    if state not in state_data:
        state_data[state] = {
            'counties': 0,
            'total_area': 0,
            'largest': (None, 0),
            'smallest': (None, float('inf'))
        }
    
    # Update county count and total area
    state_data[state]['counties'] += 1
    state_data[state]['total_area'] += area
    
    # Track largest and smallest counties by area
    if area > state_data[state]['largest'][1]:
        state_data[state]['largest'] = (county_name, area)
    
    if area < state_data[state]['smallest'][1]:
        state_data[state]['smallest'] = (county_name, area)

# Print statistics per state
for state, stats in state_data.items():
    avg_area = stats['total_area'] / stats['counties']
    print(f"State {state}: {stats['counties']} counties")
    print(f"  Largest: {stats['largest'][0]} ({stats['largest'][1]} sq mi)")
    print(f"  Smallest: {stats['smallest'][0]} ({stats['smallest'][1]} sq mi)")
    print(f"  Total area: {stats['total_area']} sq mi, Average area: {avg_area:.2f} sq mi\n")


State 02: 29 counties
  Largest: Yukon-Koyukuk (145504.789 sq mi)
  Smallest: Skagway (452.325 sq mi)
  Total area: 570640.9510000001 sq mi, Average area: 19677.27 sq mi

State 04: 15 counties
  Largest: Coconino (18618.885 sq mi)
  Smallest: Santa Cruz (1236.916 sq mi)
  Total area: 113594.085 sq mi, Average area: 7572.94 sq mi

State 05: 75 counties
  Largest: Union (1039.214 sq mi)
  Smallest: Lafayette (528.268 sq mi)
  Total area: 52035.477999999974 sq mi, Average area: 693.81 sq mi

State 08: 64 counties
  Largest: Las Animas (4772.672 sq mi)
  Smallest: Broomfield (33.034 sq mi)
  Total area: 103641.88800000002 sq mi, Average area: 1619.40 sq mi

State 09: 8 counties
  Largest: Litchfield (920.56 sq mi)
  Smallest: Middlesex (369.301 sq mi)
  Total area: 4842.356 sq mi, Average area: 605.29 sq mi

State 12: 67 counties
  Largest: Collier (1998.324 sq mi)
  Smallest: Union (243.556 sq mi)
  Total area: 53624.759 sq mi, Average area: 800.37 sq mi

State 13: 159 counties
  Largest: