In [1]:
import pandas as pd
import duckdb

In [2]:
conn: duckdb.DuckDBPyConnection = duckdb.connect("paveai.duckdb")

In [None]:
conn.execute("""
CREATE OR REPLACE VIEW golden.v_inspector_analysis_unpivot AS
SELECT DISTINCT
    inspector_id,
    'inspections' AS type,
    total_inspections AS value 
FROM
    golden.v_inspector_analysis

UNION ALL
            
SELECT DISTINCT
    inspector_id,
    'contributions rate' AS type,
    inspection_contributions_rate AS value 
FROM
    golden.v_inspector_analysis

UNION ALL
            
SELECT DISTINCT
    inspector_id,
    'contributions rate' AS type,
    inspection_contributions_rate AS value 
FROM
    golden.v_inspector_analysis

UNION ALL
             
SELECT DISTINCT
    inspector_id,
    'accuracy rate' AS type,
    rate_accuracy AS value 
FROM
    golden.v_inspector_analysis

             
UNION ALL
             
SELECT DISTINCT
    inspector_id,
    'growth rate (monthly)' AS type,
    inspection_growth_rate_monthly AS value 
FROM
    golden.v_inspector_analysis

""")

<duckdb.duckdb.DuckDBPyConnection at 0x109961fb0>

In [4]:
# extract sliver layer
df_dim_location: pd.DataFrame = conn.execute("SELECT * FROM sliver.dim_location").fetch_df()
df_dim_inspectors: pd.DataFrame = conn.execute("SELECT * FROM sliver.dim_inspectors").fetch_df()
df_dim_vehicles: pd.DataFrame = conn.execute("SELECT * FROM sliver.dim_vehicles").fetch_df()
df_dim_damage_reports: pd.DataFrame = conn.execute("SELECT * FROM sliver.dim_damage_reports").fetch_df()
df_hist_inspections: pd.DataFrame = conn.execute("SELECT * FROM sliver.hist_inspections").fetch_df()
df_dim_dates: pd.DataFrame = conn.execute("SELECT * FROM sliver.dim_dates").fetch_df()
df_dim_severity_scores: pd.DataFrame = conn.execute("SELECT * FROM sliver.dim_severity_scores").fetch_df()
df_fact_inspections: pd.DataFrame = conn.execute("SELECT * FROM sliver.fact_inspections").fetch_df()

In [5]:
# extract golden layer
df_v_avg_damage_cost_by_vehicle_type: pd.DataFrame = conn.execute("SELECT * FROM golden.v_avg_damage_cost_by_vehicle_type").fetch_df()
df_v_inspector_performance_scores: pd.DataFrame = conn.execute("SELECT * FROM golden.v_inspector_performance_scores").fetch_df()
df_v_geo_damanges_per_region: pd.DataFrame = conn.execute("SELECT * FROM golden.v_geo_damanges_per_region").fetch_df()
df_v_geo_damanges_per_location: pd.DataFrame = conn.execute("SELECT * FROM golden.v_geo_damanges_per_location").fetch_df()
df_v_inspection_trend: pd.DataFrame = conn.execute("SELECT * FROM golden.v_inspection_trend").fetch_df()
df_v_inspector_analysis: pd.DataFrame = conn.execute("SELECT * FROM golden.v_inspector_analysis").fetch_df()
df_v_inspector_analysis_unpivot: pd.DataFrame = conn.execute("SELECT * FROM golden.v_inspector_analysis_unpivot").fetch_df()

In [6]:
with pd.ExcelWriter('paveai_dataset.xlsx', engine='openpyxl') as writer:
    df_v_avg_damage_cost_by_vehicle_type.to_excel(writer, sheet_name='v_avg_damage_cost_by_vehicle_type', index=False)
    df_v_inspector_performance_scores.to_excel(writer, sheet_name='v_inspector_performance_scores', index=False)
    df_v_geo_damanges_per_region.to_excel(writer, sheet_name='v_geo_damanges_per_region', index=False)
    df_v_geo_damanges_per_location.to_excel(writer, sheet_name='v_geo_damanges_per_location', index=False)
    df_v_inspection_trend.to_excel(writer, sheet_name='v_inspection_trend', index=False)
    df_dim_location.to_excel(writer, sheet_name='dim_location', index=False)
    df_dim_inspectors.to_excel(writer, sheet_name='dim_inspectors', index=False)
    df_dim_vehicles.to_excel(writer, sheet_name='dim_vehicles', index=False)
    df_dim_damage_reports.to_excel(writer, sheet_name='dim_damage_reports', index=False)
    df_hist_inspections.to_excel(writer, sheet_name='hist_inspections', index=False)
    df_dim_severity_scores.to_excel(writer, sheet_name='dim_severity_scores', index=False)
    df_dim_dates.to_excel(writer, sheet_name='dim_dates', index=False)
    df_fact_inspections.to_excel(writer, sheet_name='fact_inspections', index=False)
    df_v_inspector_analysis.to_excel(writer, sheet_name='v_inspector_analysis', index=False)
    df_v_inspector_analysis_unpivot.to_excel(writer, sheet_name='v_inspector_analysis_unpivot', index=False)

