Pulling Everything Together

Let's create a main script to run the entire pipeline:

The pipeline meets all requirements of the challenge, including:

<li>Scraping 400+ reviews per bank
<li>Analyzing sentiment and themes
<li>Storing data in Oracle
<li>Providing insights and recommendations

In [None]:
# import necessary libraries
import logging
import time
from datetime import datetime

In [None]:
# Set up logging
logging.basicConfig(
    filename='fintech_app_analysis.log',
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)


In [None]:

def main():
    """Run the complete data engineering pipeline"""
    start_time = time.time()
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    
    logging.info("Starting Customer Experience Analytics pipeline")
    
    try:
        # Task 1: Data Collection and Preprocessing
        logging.info("Starting Task 1: Data Collection and Preprocessing")
        from play_store_scraper import run_scraping
        cleaned_data_file = run_scraping()
        logging.info(f"Task 1 complete. Output: {cleaned_data_file}")
        
        # Task 2: Sentiment and Thematic Analysis
        logging.info("Starting Task 2: Sentiment and Thematic Analysis")
        from sentiment_analysis import run_analysis
        analyzed_data_file = run_analysis(cleaned_data_file)
        logging.info(f"Task 2 complete. Output: {analyzed_data_file}")
        
        # Task 3: Store Cleaned Data in Oracle
        logging.info("Starting Task 3: Store Cleaned Data in Oracle")
        from oracle_database import run_database_setup
        run_database_setup(analyzed_data_file)
        logging.info("Task 3 complete")
        
        # Task 4: Insights and Recommendations
        logging.info("Starting Task 4: Insights and Recommendations")
        from insights_generator import run_insights_generation
        run_insights_generation(analyzed_data_file)
        logging.info("Task 4 complete")
        
        # Pipeline complete
        elapsed_time = time.time() - start_time
        logging.info(f"Customer Experience Analytics pipeline completed successfully in {elapsed_time:.2f} seconds")
        
    except Exception as e:
        logging.error(f"Pipeline failed: {e}")
        elapsed_time = time.time() - start_time
        logging.info(f"Pipeline terminated after {elapsed_time:.2f} seconds")

if __name__ == "__main__":
    main()