In [0]:
import sys
sys.path.append('/Workspace/Users/pmanoj@depaul.edu')
from config import config

In [0]:
%run "./06_master_analytics_transformation"

In [0]:
def export_to_power_bi(master_datasets):
    """
    Export datasets in Power BI friendly format.
    """
    try:
        logger.info("Starting Power BI export...")
        storage_account_name = "tokyoolympicdatamegha"
        access_key = ""

        spark.conf.set(
            f"fs.azure.account.key.{storage_account_name}.blob.core.windows.net",
            access_key
        )
        output_path = "wasbs://tokyo-olympic-data@tokyoolympicdatamegha.blob.core.windows.net/transformed/power_bi_exports/"
        
        dbutils.fs.mkdirs(output_path)
        logger.info(f"Created output directory: {output_path}")

        exported_files = []

        for dataset_name, df in master_datasets.items():
            export_path = f"{output_path}{dataset_name}.csv"
            
            df.coalesce(1).write.mode("overwrite").option("header", "true").csv(export_path)
            
            logger.info(f"Exported {dataset_name}: {df.count()} rows to {export_path}")
            exported_files.append(export_path)

        summary_info = spark.createDataFrame([
            ("country_analytics", "Country-level performance metrics and strategic insights"),
            ("sport_analytics", "Sport-level gender balance and competition analysis"), 
            ("executive_summary", "High-level KPIs and overall Olympic performance"),
            ("medal_efficiency", "Medal efficiency analysis by country"),
            ("sport_dominance", "Sport concentration and specialization strategies"),
            ("gender_analysis", "Gender participation and opportunity analysis")
        ], ["dataset_name", "description"])

        summary_path = f"{output_path}dataset_guide.csv"
        summary_info.coalesce(1).write.mode("overwrite").option("header", "true").csv(summary_path)
        
        logger.info("Power BI export completed successfully")
        return exported_files
        
    except Exception as e:
        logger.error(f"Power BI export failed: {str(e)}")
        raise

print("Power BI export functions defined")

In [0]:
try:
    logger.info("EXECUTING POWER BI EXPORT PIPELINE")
    
    logger.info("Generating master analytics datasets...")
    master_data = create_master_analytics(athletes_df, medals_df, gender_df, config)
    
    exported_files = export_to_power_bi(master_data)
    
    print("POWER BI EXPORT COMPLETED")
    print(f"Exported {len(exported_files)} datasets")
    print("\nDatasets ready for Power BI:")
    
    for i, file_path in enumerate(exported_files, 1):
        print(f"{i}. {file_path}")
    
    print(f"\n NEXT STEPS ")
    print("1. Download the CSV files from storage or use link to storage")
    print("2. Open Power BI Desktop")
    print("3. Import the CSV files")
    print("4. Create relationships between tables")
    print("5. Build your Olympic analytics dashboard!")
    
    logger.info("Power BI export pipeline completed successfully")
    
except Exception as e:
    logger.error(f"Export pipeline failed: {e}")
    raise