In [10]:
# Cell 1: Setup Environment Variables & Navigate
import os
import subprocess
import time
import requests

# --- Navigate to your project root ---
# This ensures that your subsequent paths (e.g., to ingestion_service/) are correct.
%cd /home/jupyter/SentimentAnalysis-Steam/

# --- Set environment variables for local testing ---
# These are the variables your ingestion_app.py expects.
# They will be set for this Python session and its child processes.
%env GCS_BUCKET_NAME=steam-reviews-bucket-0
%env GCS_DATA_FILE_PATH=steam_reviews_cleaned.csv
%env BQ_PROJECT_ID=sentiment-analysis-steam
%env BQ_DATASET_ID=steam_reviews
%env BQ_RAW_TABLE_ID=raw_reviews
%env REGION=us-west1 # Ensure this matches your bucket/project region for BQ dataset location

print(f"Current working directory: {os.getcwd()}")
print("Environment variables set for testing.")

# IMPORTANT: Ensure your local GCP authentication is set up.
# You need to have run 'gcloud auth application-default login' once in your JupyterLab terminal.

/home/jupyter/SentimentAnalysis-Steam
env: GCS_BUCKET_NAME=steam-reviews-bucket-0
env: GCS_DATA_FILE_PATH=steam_reviews_cleaned.csv
env: BQ_PROJECT_ID=sentiment-analysis-steam
env: BQ_DATASET_ID=steam_reviews
env: BQ_RAW_TABLE_ID=raw_reviews
env: REGION=us-west1 # Ensure this matches your bucket/project region for BQ dataset location
Current working directory: /home/jupyter/SentimentAnalysis-Steam
Environment variables set for testing.


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [11]:
# Cell 2: Run ingestion_app.py Locally in the Background

# We'll store the process object globally so we can stop it later.
global ingestion_app_process

# Check if a process is already running to avoid multiple instances
if 'ingestion_app_process' in globals() and ingestion_app_process.poll() is None:
    print("Ingestion app is already running. Terminating previous process...")
    ingestion_app_process.terminate()
    ingestion_app_process.wait()
    time.sleep(1) # Give it a moment to shut down

print("Starting ingestion_app.py in the background...")

# Use subprocess.Popen to run the Flask app as a non-blocking process.
# We redirect stdout/stderr to files so we can inspect logs later if needed.
# Ensure the path to ingestion_app.py is correct relative to your current CWD (which is /home/jupyter/SentimentAnalysis-Steam/)
log_file_path = "ingestion_app_local.log"
with open(log_file_path, "w") as outfile:
    ingestion_app_process = subprocess.Popen(
        ["python", "ingestion_service/ingestion_app.py"],
        stdout=outfile,
        stderr=subprocess.STDOUT, # Redirect stderr to stdout file
        text=True, # Handle output as text
        bufsize=1 # Line-buffered output
    )

print(f"Ingestion app started. Check '{log_file_path}' for logs.")
print("Giving the app a moment to start up...")
time.sleep(10) # Give the Flask app some time to initialize and bind to the port

# Optional: Ping the health check endpoint to confirm it's up
try:
    health_check_response = requests.get("http://localhost:8081/")
    if health_check_response.status_code == 200:
        print("✅ App health check passed: Local server is responding.")
    else:
        print(f"⚠️ App health check failed with status {health_check_response.status_code}.")
except requests.exceptions.ConnectionError:
    print("❌ App health check failed: Could not connect to local server. Check logs.")

Starting ingestion_app.py in the background...
Ingestion app started. Check 'ingestion_app_local.log' for logs.
Giving the app a moment to start up...
⚠️ App health check failed with status 405.


In [12]:
# Cell 3: Send Test Request to Local App

# The URL of your locally running Flask app
LOCAL_APP_URL = "http://localhost:8081/"

print(f"Sending POST request to {LOCAL_APP_URL}...")

try:
    # Send a simple POST request (body can be empty JSON for your current app)
    response = requests.post(LOCAL_APP_URL, json={})
    response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)

    print("✅ Request successful!")
    print("Response status code:", response.status_code)
    print("Response JSON:", response.json())

except requests.exceptions.ConnectionError as e:
    print(f"❌ Connection Error: Could not connect to the local server.")
    print(f"   Make sure 'ingestion_app.py' is running in a separate terminal on port 8080 or check logs in '{log_file_path}'.")
    print(f"   Error details: {e}")
except requests.exceptions.HTTPError as e:
    print(f"❌ HTTP Error: Server responded with {e.response.status_code}.")
    print(f"   Response: {e.response.text}")
    print(f"   Error details: {e}")
except Exception as e:
    print(f"❌ An unexpected error occurred: {e}")

# Optional: View recent logs from the background process
print("\n--- Last 20 lines of local app logs ---")
try:
    with open(log_file_path, "r") as f:
        log_lines = f.readlines()
        for line in log_lines[-20:]: # Print last 20 lines
            print(line.strip())
except FileNotFoundError:
    print("Log file not found.")


Sending POST request to http://localhost:8081/...
✅ Request successful!
Response status code: 200
Response JSON: {'message': 'Raw data ingested to BigQuery.', 'status': 'success'}

--- Last 20 lines of local app logs ---
→ Grand Theft Auto V Legacy: page 17/50, got 100 reviews.
→ Grand Theft Auto V Legacy: page 18/50, got 100 reviews.
→ Grand Theft Auto V Legacy: page 19/50, got 100 reviews.
→ Grand Theft Auto V Legacy: page 20/50, got 100 reviews.
→ Grand Theft Auto V Legacy: page 21/50, got 100 reviews.
→ Grand Theft Auto V Legacy: page 22/50, got 100 reviews.
→ Grand Theft Auto V Legacy: page 23/50, got 100 reviews.
→ Grand Theft Auto V Legacy: page 24/50, got 100 reviews.
→ Grand Theft Auto V Legacy: page 25/50, got 100 reviews.
→ Grand Theft Auto V Legacy: page 26/50, got 100 reviews.
→ Grand Theft Auto V Legacy: page 27/50, got 100 reviews.
→ Grand Theft Auto V Legacy: page 28/50, got 100 reviews.
→ Grand Theft Auto V Legacy: page 29/50, got 100 reviews.
→ Grand Theft Auto V Lega

In [13]:
# Cell 4: Stop the Local App
import time

if 'ingestion_app_process' in globals() and ingestion_app_process.poll() is None:
    print("Terminating ingestion app process...")
    ingestion_app_process.terminate() # Request termination
    ingestion_app_process.wait(timeout=5) # Wait for it to terminate
    if ingestion_app_process.poll() is None: # Check if it's still running
        print("Process did not terminate gracefully, killing it.")
        ingestion_app_process.kill() # Force kill if it's stuck
    print("App process stopped.")
else:
    print("Ingestion app is not running.")

Terminating ingestion app process...
App process stopped.
