In [2]:
import pandas as pd

# ----------------------------------------------------------  
# TITLE: Comprehensive NRW Flood Warning Areas Analysis (Steps 1-11)
# ----------------------------------------------------------

# Step 1: Load dataset from CSV file
print("=== Step 1: Loading Dataset ===")
print("This step loads the NRW flood warning CSV file and confirms successful import")
df = pd.read_csv(r"F:/Data Science/Data Set/Enviro_Data/NRW_FLOOD_WARNING.csv")
print(f"✅ Dataset loaded successfully: {len(df):,} records imported\n")

# Step 2: Dataset structure overview
print("=== Step 2: Dataset Overview ===")
print("This step shows total records, columns, and column names for data validation")
print(f"Total records: {len(df):,}")
print(f"Total columns: {len(df.columns)}")
print(f"Column names: {df.columns.tolist()}\n")

# Step 3: Flood warning areas by region distribution
print("=== Step 3: Flood Warning Areas by Region ===")
print("This step shows number of FWAs per region and total regions covered")
region_counts = df['region'].value_counts()
print("Number of flood warning areas (FWAs) per region:\n")
print(region_counts)
print(f"\nTotal number of regions: {region_counts.shape[0]}\n")

# Step 4: Flood warning areas by administrative area
print("=== Step 4: Flood Warning Areas by Administrative Area ===")
print("This step shows FWAs per administrative area as defined by NRW")
area_counts = df['area'].value_counts()
print("Number of FWAs per administrative area:\n")
print(area_counts.head(15))
print(f"\nTotal number of administrative areas: {area_counts.shape[0]}\n")

# Step 5: Unique flood warning area names count
print("=== Step 5: Unique Flood Warning Area Names ===")
print("This step counts distinct FWA names across the dataset")
unique_fwa_names = df['fwa_name'].nunique()
print(f"Total number of unique flood warning area names: {unique_fwa_names:,}")
print(f"Name uniqueness ratio: {unique_fwa_names/len(df)*100:.1f}%\n")

# Step 6: FWAs by region and river/sea combination
print("=== Step 6: FWAs by Region × River/Sea Combination ===")
print("This step identifies region-river/sea combinations with highest FWA counts")
region_river = (
    df.groupby(['region', 'river_sea'])['fwa_name']
    .count()
    .reset_index(name='count')
)
region_river_sorted = region_river.sort_values('count', ascending=False)
print("Top 20 region × river/sea combinations with highest number of FWAs:\n")
print(region_river_sorted.head(20))
print(f"\nTotal number of region × river/sea combinations: {len(region_river)}\n")

# Step 7: Flood warning area counts by parent code hierarchy
print("=== Step 7: Flood Warning Area Counts by Parent Code ===")
print("This step shows hierarchical structure via parent codes")
parent_counts = df['parent'].value_counts()
print("Top 20 parent codes by number of associated FWAs:\n")
print(parent_counts.head(20))
print(f"\nTotal number of unique parent codes: {parent_counts.shape[0]}\n")

# Step 8: Availability of Welsh flood warning names
print("=== Step 8: Availability of Welsh Flood Warning Names ===")
print("This step assesses bilingual naming completeness for Welsh")
missing_welsh_name = df['w_fwa_name'].isna().sum()
total_records = len(df)
percent_missing = (missing_welsh_name / total_records) * 100
print(f"Total number of FWAs missing a Welsh name: {missing_welsh_name:,}")
print(f"Percentage of FWAs missing a Welsh name: {percent_missing:.2f}%")
print(f"Welsh name coverage: {(100-percent_missing):.2f}%\n")

# Step 9: Regional density and diversity metrics
print("=== Step 9: Regional Density Analysis ===")
print("This step shows FWA density metrics per region (FWAs, areas, river types)")
region_metrics = df.groupby('region').agg({
    'fwa_name': 'count',
    'area': 'nunique',
    'river_sea': 'nunique'
}).round(0)
region_metrics.columns = ['Total_FWAs', 'Unique_Areas', 'RiverSea_Types']
print("Regional density metrics:\n")
print(region_metrics.sort_values('Total_FWAs', ascending=False))
print()

# Step 10: Parent hierarchy complexity analysis
print("=== Step 10: Parent Hierarchy Complexity ===")
print("This step analyzes geographic complexity of each parent code")
parent_complexity = df.groupby('parent').agg({
    'fwa_name': 'count',
    'region': 'nunique',
    'area': 'nunique'
}).round(0)
parent_complexity.columns = ['Child_FWAs', 'Regions_Covered', 'Areas_Covered']
parent_complexity_top = parent_complexity.sort_values('Child_FWAs', ascending=False).head(15)
print("Top 15 parent codes by hierarchy complexity:\n")
print(parent_complexity_top)
print()

# Step 11: Comprehensive data quality assessment
print("=== Step 11: Data Quality Dashboard ===")
print("This step provides complete dataset health metrics summary")
quality_summary = {
    'Total FWAs': f"{len(df):,}",
    'Regions covered': df['region'].nunique(),
    'Admin areas': df['area'].nunique(),
    'Parent codes': df['parent'].nunique(),
    'River/Sea types': df['river_sea'].nunique(),
    'Unique FWA names': df['fwa_name'].nunique(),
    'Welsh names complete': f"{(1 - df['w_fwa_name'].isna().mean()):.1%}"
}
print("Dataset quality metrics:")
for metric, value in quality_summary.items():
    print(f"  {metric:20}: {value}")

# ----------------------------------------------------------  
# SUMMARY
# ----------------------------------------------------------
print("\n=== Summary ===")
print("This comprehensive analysis examines NRW flood warning data across 11 dimensions:")
print("- Steps 1-2: Dataset loading and structure validation")
print("- Steps 3-5: Geographic and naming distributions") 
print("- Step 6: Region × river/sea concentration patterns")
print("- Step 7: Hierarchical parent code structures")
print("- Step 8: Welsh language name completeness")
print("- Steps 9-10: Advanced density and complexity metrics")
print("- Step 11: Complete data quality dashboard")
print("✅ Analysis ready for flood risk modeling and NRW reporting")


=== Step 1: Loading Dataset ===
✅ Dataset loaded successfully: 353 records imported

=== Step 2: Dataset Overview ===
This step shows total records, columns, and column names for data validation
Total records: 353
Total columns: 22
Column names: ['FID', 'objectid', 'region', 'area', 'fwd_tacode', 'fwis_code', 'fwa_name', 'descrip', 'river_sea', 'parent', 'e_qdial', 'w_region', 'w_fwa_name', 'w_descrip', 'w_afon', 'w_qdial', 'ta_address', 'fwd_reg', 'fwd_reg_da', 'pub_reg', 'globalid', 'geom']

This step shows number of FWAs per region and total regions covered

region
Wales    353
Name: count, dtype: int64

Total number of regions: 1

This step shows FWAs per administrative area as defined by NRW
Number of FWAs per administrative area:

area
NRW South West    151
NRW Northern      109
NRW South East     93
Name: count, dtype: int64

Total number of administrative areas: 3

This step counts distinct FWA names across the dataset
Name uniqueness ratio: 100.0%

=== Step 6: FWAs by Region ×

In [3]:
import os
print(os.path.abspath(''))

C:\Users\kaust
