In [2]:
import json

# Load the GeoJSON data from file
with open("gz_2010_us_050_00_5m.json", 'r') as f:
    data = json.load(f)

# Extract features (list of county data)
feature_list = data['features']

# TASK 1: Basic Summary Statistics
print(f"Number of Counties in US: {len(feature_list)}")
print(f"The second feature name is {feature_list[1]['properties']['NAME']}")

FileNotFoundError: [Errno 2] No such file or directory: 'gz_2010_us_050_00_5m.json'

In [None]:
# TASK 2: Finding Unique County Names and Most Common County Names
unique_counties = {feature['properties']['NAME'] for feature in feature_list}
print(f"Total unique county names: {len(unique_counties)}")

# Dictionary to count occurrences of county names and track associated states
county_counts = {}
county_states = {}

for feature in feature_list:
    name = feature['properties']['NAME']
    state = feature['properties']['STATE']
    
    # Initialize dictionary entries if not present
    county_counts.setdefault(name, 0)
    county_states.setdefault(name, set())
    
    # Update counts and state tracking
    county_counts[name] += 1
    county_states[name].add(state)

# Sort counties by frequency in descending order and get the top 3
sorted_counties = sorted(county_counts.items(), key=lambda item: item[1], reverse=True)[:3]

print("Top 3 most common county names:")
for name, count in sorted_counties:
    print(f"{name}: {count} occurrences in states {county_states[name]}")


In [None]:
# TASK 3: State-wise County Statistics
state_data = {}

for feature in feature_list:
    state = feature['properties']['STATE']
    county_name = feature['properties']['NAME']
    area = feature['properties']['CENSUSAREA']
    
    # Initialize state tracking if not present
    if state not in state_data:
        state_data[state] = {
            'counties': 0,
            'total_area': 0,
            'largest': (None, 0),
            'smallest': (None, float('inf'))
        }
    
    # Update county count and total area
    state_data[state]['counties'] += 1
    state_data[state]['total_area'] += area
    
    # Track largest and smallest counties by area
    if area > state_data[state]['largest'][1]:
        state_data[state]['largest'] = (county_name, area)
    
    if area < state_data[state]['smallest'][1]:
        state_data[state]['smallest'] = (county_name, area)

# Print statistics per state
for state, stats in state_data.items():
    avg_area = stats['total_area'] / stats['counties']
    print(f"State {state}: {stats['counties']} counties")
    print(f"  Largest: {stats['largest'][0]} ({stats['largest'][1]} sq mi)")
    print(f"  Smallest: {stats['smallest'][0]} ({stats['smallest'][1]} sq mi)")
    print(f"  Total area: {stats['total_area']} sq mi, Average area: {avg_area:.2f} sq mi\n")
