In [1]:
import pandas as pd
import duckdb

In [2]:
conn: duckdb.DuckDBPyConnection = duckdb.connect("paveai.duckdb")

In [None]:
conn.execute("""
CREATE OR REPLACE VIEW golden.v_inspector_analysis_unpivot AS
SELECT DISTINCT
    inspector_id,
    'inspections' AS type,
    total_inspections AS value 
FROM
    golden.v_inspector_analysis

UNION ALL
            
SELECT DISTINCT
    inspector_id,
    'contributions rate' AS type,
    inspection_contributions_rate AS value 
FROM
    golden.v_inspector_analysis

UNION ALL
            
SELECT DISTINCT
    inspector_id,
    'contributions rate' AS type,
    inspection_contributions_rate AS value 
FROM
    golden.v_inspector_analysis

UNION ALL
             
SELECT DISTINCT
    inspector_id,
    'accuracy rate' AS type,
    rate_accuracy AS value 
FROM
    golden.v_inspector_analysis

             
UNION ALL
             
SELECT DISTINCT
    inspector_id,
    'growth rate (monthly)' AS type,
    inspection_growth_rate_monthly AS value 
FROM
    golden.v_inspector_analysis

""")

<duckdb.duckdb.DuckDBPyConnection at 0x109961fb0>

In [4]:
# extract sliver layer
df_dim_location: pd.DataFrame = conn.execute("SELECT * FROM sliver.dim_location").fetch_df()
df_dim_inspectors: pd.DataFrame = conn.execute("SELECT * FROM sliver.dim_inspectors").fetch_df()
df_dim_vehicles: pd.DataFrame = conn.execute("SELECT * FROM sliver.dim_vehicles").fetch_df()
df_dim_damage_reports: pd.DataFrame = conn.execute("SELECT * FROM sliver.dim_damage_reports").fetch_df()
df_hist_inspections: pd.DataFrame = conn.execute("SELECT * FROM sliver.hist_inspections").fetch_df()
df_dim_dates: pd.DataFrame = conn.execute("SELECT * FROM sliver.dim_dates").fetch_df()
df_dim_severity_scores: pd.DataFrame = conn.execute("SELECT * FROM sliver.dim_severity_scores").fetch_df()
df_fact_inspections: pd.DataFrame = conn.execute("SELECT * FROM sliver.fact_inspections").fetch_df()

In [5]:
# extract golden layer
df_v_avg_damage_cost_by_vehicle_type: pd.DataFrame = conn.execute("SELECT * FROM golden.v_avg_damage_cost_by_vehicle_type").fetch_df()
df_v_inspector_performance_scores: pd.DataFrame = conn.execute("SELECT * FROM golden.v_inspector_performance_scores").fetch_df()
df_v_geo_damanges_per_region: pd.DataFrame = conn.execute("SELECT * FROM golden.v_geo_damanges_per_region").fetch_df()
df_v_geo_damanges_per_location: pd.DataFrame = conn.execute("SELECT * FROM golden.v_geo_damanges_per_location").fetch_df()
df_v_inspection_trend: pd.DataFrame = conn.execute("SELECT * FROM golden.v_inspection_trend").fetch_df()
df_v_inspector_analysis: pd.DataFrame = conn.execute("SELECT * FROM golden.v_inspector_analysis").fetch_df()
df_v_inspector_analysis_unpivot: pd.DataFrame = conn.execute("SELECT * FROM golden.v_inspector_analysis_unpivot").fetch_df()

In [6]:
with pd.ExcelWriter('paveai_dataset.xlsx', engine='openpyxl') as writer:
    df_v_avg_damage_cost_by_vehicle_type.to_excel(writer, sheet_name='v_avg_damage_cost_by_vehicle_type', index=False)
    df_v_inspector_performance_scores.to_excel(writer, sheet_name='v_inspector_performance_scores', index=False)
    df_v_geo_damanges_per_region.to_excel(writer, sheet_name='v_geo_damanges_per_region', index=False)
    df_v_geo_damanges_per_location.to_excel(writer, sheet_name='v_geo_damanges_per_location', index=False)
    df_v_inspection_trend.to_excel(writer, sheet_name='v_inspection_trend', index=False)
    df_dim_location.to_excel(writer, sheet_name='dim_location', index=False)
    df_dim_inspectors.to_excel(writer, sheet_name='dim_inspectors', index=False)
    df_dim_vehicles.to_excel(writer, sheet_name='dim_vehicles', index=False)
    df_dim_damage_reports.to_excel(writer, sheet_name='dim_damage_reports', index=False)
    df_hist_inspections.to_excel(writer, sheet_name='hist_inspections', index=False)
    df_dim_severity_scores.to_excel(writer, sheet_name='dim_severity_scores', index=False)
    df_dim_dates.to_excel(writer, sheet_name='dim_dates', index=False)
    df_fact_inspections.to_excel(writer, sheet_name='fact_inspections', index=False)
    df_v_inspector_analysis.to_excel(writer, sheet_name='v_inspector_analysis', index=False)
    df_v_inspector_analysis_unpivot.to_excel(writer, sheet_name='v_inspector_analysis_unpivot', index=False)



In [8]:
df_v_inspector_analysis

Unnamed: 0,inspector_id,total_inspections,total_duration_minutes,inspection_contributions_rate,total_poor_inspections,rate_accuracy,total_inspections_per_month,total_inspections_per_year,month,year,inspection_growth_rate_monthly
0,INS0001,87,5394.0,0.0174,14,0.839,3,18,7,2023,0.333333
1,INS0001,87,5394.0,0.0174,14,0.839,3,18,8,2023,0.333333
2,INS0001,87,5394.0,0.0174,14,0.839,5,18,9,2023,0.333333
3,INS0001,87,5394.0,0.0174,14,0.839,2,18,10,2023,0.333333
4,INS0001,87,5394.0,0.0174,14,0.839,1,18,11,2023,0.333333
...,...,...,...,...,...,...,...,...,...,...,...
1219,INS0050,104,6641.0,0.0208,8,0.923,4,34,3,2025,2.000000
1220,INS0050,104,6641.0,0.0208,8,0.923,9,34,4,2025,2.000000
1221,INS0050,104,6641.0,0.0208,8,0.923,4,34,5,2025,2.000000
1222,INS0050,104,6641.0,0.0208,8,0.923,2,34,6,2025,2.000000


Unnamed: 0,inspector_id,type,value
0,INS0001,inspections,87.000000
1,INS0009,inspections,89.000000
2,INS0011,inspections,113.000000
3,INS0018,inspections,104.000000
4,INS0035,inspections,93.000000
...,...,...,...
245,INS0012,growth rate (monthly),1.000000
246,INS0048,growth rate (monthly),2.000000
247,INS0026,growth rate (monthly),0.000000
248,INS0029,growth rate (monthly),-0.333333


In [11]:
conn.execute("""
SELECT DISTINCT
    * FROM golden.v_inspector_analysis

""").fetch_df()

Unnamed: 0,inspector_id,total_inspections,total_duration_minutes,inspection_contributions_rate,total_poor_inspections,rate_accuracy,total_inspections_per_month,total_inspections_per_year,month,year,inspection_growth_rate_monthly
0,INS0039,110,7067.0,0.022,42,0.618,5,27,8,2023,1.5
1,INS0039,110,7067.0,0.022,42,0.618,3,27,10,2023,1.5
2,INS0039,110,7067.0,0.022,42,0.618,2,50,3,2024,1.5
3,INS0039,110,7067.0,0.022,42,0.618,3,50,4,2024,1.5
4,INS0039,110,7067.0,0.022,42,0.618,5,50,11,2024,1.5
...,...,...,...,...,...,...,...,...,...,...,...
1219,INS0038,120,7723.0,0.024,49,0.592,5,60,3,2024,2.0
1220,INS0038,120,7723.0,0.024,49,0.592,8,60,5,2024,2.0
1221,INS0038,120,7723.0,0.024,49,0.592,2,60,8,2024,2.0
1222,INS0038,120,7723.0,0.024,49,0.592,3,36,1,2025,2.0
