# GeoQA — Report Generation & Batch Processing

This notebook demonstrates how to generate professional HTML quality reports
and batch-process multiple datasets.

In [1]:
import os
import geoqa
import pandas as pd

print(f"GeoQA version: {geoqa.__version__}")

GeoQA version: 0.2.0


## 1. Single Dataset Report

In [2]:
# Profile and generate report for a single dataset
profile = geoqa.profile(r"../../data/giza_buildings.shp")

# Generate HTML report
report_path = profile.to_html("reports/giza_buildings_report.html")
print(f"Report generated: {report_path}")
print(f"Quality Score: {profile.quality_score:.1f}/100")

Report generated: reports\giza_buildings_report.html
Quality Score: 99.9/100


## 2. Batch Process All Shapefiles

Profile every shapefile in the data directory and generate individual reports.

In [3]:
data_dir = r"../../data"
shapefiles = [f for f in os.listdir(data_dir) if f.endswith('.shp')]

print(f"Found {len(shapefiles)} shapefiles:")
for shp in shapefiles:
    print(f"  - {shp}")

Found 6 shapefiles:
  - giza.shp
  - giza_buildings.shp
  - giza_roads.shp
  - mrkz_nasr.shp
  - october.shp
  - Qena_shapefile.shp


In [4]:
# Batch profile all shapefiles
results = []

for shp in shapefiles:
    filepath = os.path.join(data_dir, shp)
    try:
        p = geoqa.profile(filepath)
        
        # Generate individual report
        report_name = f"reports/{p.name}_report.html"
        p.to_html(report_name)
        
        results.append({
            "Dataset": p.name,
            "Features": p.feature_count,
            "Columns": p.column_count,
            "Type": p.geometry_type,
            "CRS": p.crs or "None",
            "Invalid Geom": p.geometry_results['invalid_count'],
            "Empty Geom": p.geometry_results['empty_count'],
            "Null Attrs": p.attribute_results['total_nulls'],
            "Quality Score": round(p.quality_score, 1),
            "Report": report_name,
        })
        print(f"  ✅ {p.name}: Score={p.quality_score:.1f}")
    except Exception as e:
        results.append({"Dataset": shp, "Error": str(e)})
        print(f"  ❌ {shp}: {e}")

  ✅ giza: Score=85.0
  ✅ giza_buildings: Score=99.9
  ✅ giza_roads: Score=94.1
  ✅ mrkz_nasr: Score=100.0
  ✅ october: Score=100.0


  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.savefig(
  fig.savefig(
  fig.savefig(
  fig.savefig(
  fig.savefig(


  ✅ Qena_shapefile: Score=96.7


## 3. Comparison Dashboard

In [5]:
# Create comparison DataFrame
df = pd.DataFrame(results)
if 'Quality Score' in df.columns:
    df = df.sort_values('Quality Score', ascending=False)
df

Unnamed: 0,Dataset,Features,Columns,Type,CRS,Invalid Geom,Empty Geom,Null Attrs,Quality Score,Report
4,october,1,5,Polygon,EPSG:32636,0,0,0,100.0,reports/october_report.html
3,mrkz_nasr,1,7,Polygon,EPSG:4229,0,0,0,100.0,reports/mrkz_nasr_report.html
1,giza_buildings,24615,15,Polygon,EPSG:4326,1,0,1118,99.9,reports/giza_buildings_report.html
5,Qena_shapefile,1,9,Polygon,EPSG:4229,0,0,1,96.7,reports/Qena_shapefile_report.html
2,giza_roads,8981,7,LineString,EPSG:4326,0,0,12413,94.1,reports/giza_roads_report.html
0,giza,1,6,Polygon,EPSG:32636,0,0,3,85.0,reports/giza_report.html


## 4. Visualize Quality Scores

In [6]:
import matplotlib.pyplot as plt

if 'Quality Score' in df.columns:
    fig, ax = plt.subplots(figsize=(10, 5))
    
    scores = df['Quality Score'].values
    names = df['Dataset'].values
    colors = ['#16a34a' if s >= 80 else '#d97706' if s >= 60 else '#dc2626' for s in scores]
    
    bars = ax.barh(names, scores, color=colors, edgecolor='white', linewidth=0.5)
    ax.set_xlabel('Quality Score')
    ax.set_title('GeoQA Quality Scores — Dataset Comparison')
    ax.set_xlim(0, 100)
    
    for bar, score in zip(bars, scores):
        ax.text(bar.get_width() + 1, bar.get_y() + bar.get_height()/2,
                f'{score:.0f}', va='center', fontweight='bold')
    
    plt.tight_layout()
    plt.savefig('reports/quality_comparison.png', dpi=150, bbox_inches='tight')
    plt.show()
    print("Chart saved to reports/quality_comparison.png")

Chart saved to reports/quality_comparison.png


  plt.show()


## 5. Export Summary to CSV

In [7]:
# Save comparison summary
csv_path = "reports/quality_summary.csv"
df.to_csv(csv_path, index=False)
print(f"Summary exported to {csv_path}")

Summary exported to reports/quality_summary.csv


---

**GeoQA** — Geospatial Data Quality Assessment & Interactive Profiling