In [1]:
# 🚀 Dashboard Launch Helper

def launch_dashboard(share=False, port=7860):
    """
    Launch the enhanced dashboard
    
    Args:
        share (bool): Whether to create a public link
        port (int): Port to run the server on
    """
    try:
        if 'enhanced_dashboard' not in globals():
            print("❌ Enhanced dashboard not found. Please run the dashboard creation cell first.")
            return
        
        print(f"🚀 Launching Enhanced Climate Dashboard on port {port}...")
        print("📊 Dashboard Features:")
        print("   - Country Analysis with PySpark backend")
        print("   - Global Comparisons")
        print("   - AI Temperature Predictions (demo)")
        print("   - System Monitoring")
        print("   - Interactive Visualizations")
        print()
        
        # Launch with threading to avoid blocking
        import threading
        
        def launch_in_background():
            enhanced_dashboard.launch(
                share=share,
                server_name="0.0.0.0",
                server_port=port,
                show_error=True,
                quiet=False,
                prevent_thread_lock=True
            )
        
        thread = threading.Thread(target=launch_in_background, daemon=True)
        thread.start()
        
        print(f"🌐 Dashboard should be available at: http://localhost:{port}")
        if share:
            print("🔗 Public link will be generated...")
        
    except Exception as e:
        print(f"❌ Error launching dashboard: {e}")

print("🎛️ Dashboard Launch Helper loaded!")
print("📖 Usage: launch_dashboard() or launch_dashboard(share=True, port=7860)")
print("⚠️ NOTE: Run cells 3-9 first to create the dashboard, then use launch_dashboard()")

🎛️ Dashboard Launch Helper loaded!
📖 Usage: launch_dashboard() or launch_dashboard(share=True, port=7860)
⚠️ NOTE: Run cells 3-9 first to create the dashboard, then use launch_dashboard()


# PySpark Climate Data Analysis - Optimized

This notebook demonstrates optimized PySpark operations for climate data analysis including:
- Efficient Spark configuration
- Data loading and preprocessing
- Performance-optimized transformations
- Interactive visualizations with Gradio

In [2]:
# Optimized imports
from pyspark.sql import SparkSession
from pyspark.sql.functions import (
    col, mean, stddev, month, year, avg, 
    when, isnan, isnull, broadcast, 
    percentile_approx, desc, asc, count,
    min as spark_min, max as spark_max
)
from pyspark.sql.types import DoubleType
import pandas as pd
import matplotlib.pyplot as plt
import gradio as gr
import warnings
warnings.filterwarnings('ignore')

# Also import plotly for advanced visualizations
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import numpy as np

# Optimized Spark Configuration
def create_optimized_spark_session():
    """Create an optimized Spark session for climate data analysis"""
    spark = SparkSession.builder \
        .appName("OptimizedClimateAnalysis") \
        .config("spark.sql.adaptive.enabled", "true") \
        .config("spark.sql.adaptive.coalescePartitions.enabled", "true") \
        .config("spark.sql.adaptive.skewJoin.enabled", "true") \
        .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") \
        .config("spark.sql.execution.arrow.pyspark.enabled", "true") \
        .config("spark.sql.repl.eagerEval.enabled", "true") \
        .config("spark.sql.repl.eagerEval.maxNumRows", "20") \
        .getOrCreate()
    
    # Set log level to reduce verbosity
    spark.sparkContext.setLogLevel("WARN")
    return spark

# Initialize optimized Spark session
spark = create_optimized_spark_session()
print(f"Spark version: {spark.version}")
print(f"Spark UI available at: {spark.sparkContext.uiWebUrl}")

  from .autonotebook import tqdm as notebook_tqdm
Using Spark's default log4j profile: org/apache/spark/log4j2-defaults.properties
25/08/16 07:30:31 WARN Utils: Your hostname, codespaces-01cd44, resolves to a loopback address: 127.0.0.1; using 10.0.1.114 instead (on interface eth0)
25/08/16 07:30:31 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Using Spark's default log4j profile: org/apache/spark/log4j2-defaults.properties
25/08/16 07:30:31 WARN Utils: Your hostname, codespaces-01cd44, resolves to a loopback address: 127.0.0.1; using 10.0.1.114 instead (on interface eth0)
25/08/16 07:30:31 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Using Spark's default log4j profile: org/apache/spark/log4j2-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
Using Spark's default log4j profile: org/apache/spark/log4j2-defaults.properties
Setting default 

Spark version: 4.0.0
Spark UI available at: http://34fd580d-f292-40bc-8d12-3ae5cb26456d.internal.cloudapp.net:4040


In [3]:
# Optimized Data Loading and Preprocessing

def load_and_preprocess_data(file_path="GlobalLandTemperaturesByCity.csv", output_path="output_folder"):
    """
    Load CSV data and convert to optimized Parquet format with preprocessing
    """
    print("📊 Loading and preprocessing climate data...")
    
    # Read CSV with optimized schema inference
    df = spark.read.option("header", "true") \
                  .option("inferSchema", "true") \
                  .option("timestampFormat", "yyyy-MM-dd") \
                  .csv(file_path)
    
    print(f"📈 Original dataset: {df.count():,} rows, {len(df.columns)} columns")
    
    # Data quality checks and cleaning
    df_clean = df.filter(
        col("AverageTemperature").isNotNull() & 
        col("Country").isNotNull() & 
        col("City").isNotNull() &
        col("dt").isNotNull()
    ).filter(
        # Remove extreme outliers (temperatures beyond physical limits)
        (col("AverageTemperature") >= -80) & 
        (col("AverageTemperature") <= 60)
    )
    
    # Add derived columns for better partitioning and analysis
    df_enhanced = df_clean.withColumn("year", year("dt")) \
                         .withColumn("month", month("dt")) \
                         .withColumn("temp_celsius", col("AverageTemperature").cast(DoubleType()))
    
    # Optimal partitioning strategy - partition by year for time-series analysis
    df_partitioned = df_enhanced.repartition(8, "year")
    
    # Cache the cleaned dataset for multiple operations
    df_partitioned.cache()
    
    print(f"✅ Cleaned dataset: {df_partitioned.count():,} rows")
    
    # Write to optimized Parquet format
    df_partitioned.write \
        .mode("overwrite") \
        .option("compression", "snappy") \
        .partitionBy("year") \
        .parquet(output_path)
    
    print(f"💾 Data saved to {output_path} in Parquet format")
    return df_partitioned

# Load and preprocess data
try:
    # Try to load existing parquet data first
    climate_df_raw = spark.read.parquet("output_folder")
    print("✅ Loaded existing Parquet data")
    
    # Check if derived columns exist, if not add them
    if "month" not in climate_df_raw.columns or "year" not in climate_df_raw.columns:
        print("🔄 Adding missing derived columns (month, year)...")
        climate_df = climate_df_raw.withColumn("year", year("dt")) \
                                   .withColumn("month", month("dt")) \
                                   .withColumn("temp_celsius", col("AverageTemperature").cast(DoubleType()))
        
        # Cache the enhanced dataset
        climate_df.cache()
        print("✅ Added derived columns and cached dataset")
    else:
        climate_df = climate_df_raw
        climate_df.cache()
        print("✅ Dataset already has derived columns")
        
except:
    # If parquet doesn't exist, process from CSV
    print("📂 Parquet file not found, processing from CSV...")
    climate_df = load_and_preprocess_data()

# Show basic statistics
print("\n📊 Dataset Overview:")
climate_df.printSchema()
climate_df.describe("AverageTemperature").show()

                                                                                

✅ Loaded existing Parquet data
🔄 Adding missing derived columns (month, year)...
✅ Added derived columns and cached dataset

📊 Dataset Overview:
root
 |-- dt: date (nullable = true)
 |-- AverageTemperature: double (nullable = true)
 |-- AverageTemperatureUncertainty: double (nullable = true)
 |-- City: string (nullable = true)
 |-- Country: string (nullable = true)
 |-- Latitude: string (nullable = true)
 |-- Longitude: string (nullable = true)
 |-- year: integer (nullable = true)
 |-- month: integer (nullable = true)
 |-- temp_celsius: double (nullable = true)

✅ Added derived columns and cached dataset

📊 Dataset Overview:
root
 |-- dt: date (nullable = true)
 |-- AverageTemperature: double (nullable = true)
 |-- AverageTemperatureUncertainty: double (nullable = true)
 |-- City: string (nullable = true)
 |-- Country: string (nullable = true)
 |-- Latitude: string (nullable = true)
 |-- Longitude: string (nullable = true)
 |-- year: integer (nullable = true)
 |-- month: integer (nulla



+-------+------------------+
|summary|AverageTemperature|
+-------+------------------+
|  count|           8235082|
|   mean|16.727432636249816|
| stddev|10.353442482534478|
|    min|-42.70399999999999|
|    max|            39.651|
+-------+------------------+



                                                                                

In [4]:
# Optimized Anomaly Detection

def detect_temperature_anomalies(df, country_name="Pakistan", z_threshold=2.5):
    """
    Efficient anomaly detection using window functions and broadcasting
    """
    print(f"🔍 Detecting temperature anomalies for {country_name}...")
    
    # Filter for specific country and cache for multiple operations
    country_df = df.filter(col("Country") == country_name).cache()
    
    # Calculate statistics using Spark SQL for better optimization
    stats_df = country_df.agg(
        mean("AverageTemperature").alias("mean_temp"),
        stddev("AverageTemperature").alias("stddev_temp")
    )
    
    # Broadcast small statistics for efficient joins
    stats = stats_df.collect()[0]
    mean_temp = stats["mean_temp"]
    stddev_temp = stats["stddev_temp"]
    
    print(f"📊 Temperature stats for {country_name}:")
    print(f"   Mean: {mean_temp:.2f}°C, Std Dev: {stddev_temp:.2f}°C")
    
    # Calculate z-scores efficiently
    anomalies_df = country_df.withColumn(
        "z_score",
        (col("AverageTemperature") - mean_temp) / stddev_temp
    ).withColumn(
        "is_anomaly",
        (col("z_score") > z_threshold) | (col("z_score") < -z_threshold)
    )
    
    # Filter and collect anomalies
    anomalies = anomalies_df.filter(col("is_anomaly") == True) \
                           .orderBy(desc("z_score")) \
                           .limit(100)
    
    anomaly_count = anomalies.count()
    print(f"🚨 Found {anomaly_count} temperature anomalies (|z-score| > {z_threshold})")
    
    return anomalies_df, anomalies, mean_temp, stddev_temp

# Detect anomalies for Pakistan
anomalies_df, top_anomalies, mean_temp, stddev_temp = detect_temperature_anomalies(climate_df)

# Show top anomalies
print("\n🔥 Top Temperature Anomalies:")
top_anomalies.select("dt", "City", "AverageTemperature", "z_score").show(10)

🔍 Detecting temperature anomalies for Pakistan...


                                                                                

📊 Temperature stats for Pakistan:
   Mean: 23.97°C, Std Dev: 7.91°C
🚨 Found 42 temperature anomalies (|z-score| > 2.5)

🔥 Top Temperature Anomalies:
🚨 Found 42 temperature anomalies (|z-score| > 2.5)

🔥 Top Temperature Anomalies:
+----------+--------+------------------+-------------------+
|        dt|    City|AverageTemperature|            z_score|
+----------+--------+------------------+-------------------+
|1964-01-01|  Mardan|              4.16|  -2.50439790547346|
|1964-01-01|Mingaora|              4.16|  -2.50439790547346|
|1964-01-01|     Wah|              4.16|  -2.50439790547346|
|1964-01-01|Peshawar|              4.16|  -2.50439790547346|
|1893-02-01|  Mardan|4.0360000000000005| -2.520071959125641|
|1893-02-01|Peshawar|4.0360000000000005| -2.520071959125641|
|1893-02-01|Mingaora|4.0360000000000005| -2.520071959125641|
|1893-02-01|     Wah|4.0360000000000005| -2.520071959125641|
|1895-01-01|  Mardan|             4.011|-2.5232320505877746|
|1895-01-01|     Wah|             4.01

In [5]:
# Optimized Climate Pattern Analysis - Fixed

def analyze_climate_patterns(df):
    """
    Efficient climate pattern analysis using optimized aggregations
    """
    print("🌡️ Analyzing climate patterns...")
    
    # Monthly patterns with efficient grouping
    monthly_patterns = df.groupBy("Country", "month") \
                        .agg(
                            avg("AverageTemperature").alias("avg_temp"),
                            stddev("AverageTemperature").alias("temp_variability"),
                            count("*").alias("data_points")
                        ) \
                        .filter(col("data_points") > 10) \
                        .cache()
    
    # Yearly trends with window functions for better performance
    yearly_trends = df.groupBy("Country", "year") \
                     .agg(
                         avg("AverageTemperature").alias("yearly_avg_temp"),
                         count("*").alias("yearly_data_points")
                     ) \
                     .filter(col("yearly_data_points") > 12) \
                     .cache()
    
    # Country-wise temperature statistics - using properly imported functions
    country_stats = df.groupBy("Country") \
                     .agg(
                         avg("AverageTemperature").alias("overall_avg_temp"),
                         spark_min("AverageTemperature").alias("min_temp"),
                         spark_max("AverageTemperature").alias("max_temp"),
                         stddev("AverageTemperature").alias("temp_std"),
                         count("*").alias("total_records")
                     ) \
                     .filter(col("total_records") > 100) \
                     .orderBy(desc("overall_avg_temp")) \
                     .cache()
    
    print(f"📈 Analyzed patterns for {country_stats.count()} countries")
    
    return monthly_patterns, yearly_trends, country_stats

# Perform pattern analysis
monthly_patterns, yearly_trends, country_stats = analyze_climate_patterns(climate_df)

# Show insights
print("\n🌍 Top 10 Warmest Countries (Average):")
country_stats.select("Country", "overall_avg_temp", "temp_std", "total_records").show(10)

print("\n🇵🇰 Pakistan Monthly Temperature Patterns:")
monthly_patterns.filter(col("Country") == "Pakistan") \
                .orderBy("month") \
                .select("month", "avg_temp", "temp_variability") \
                .show(12)

🌡️ Analyzing climate patterns...


                                                                                

📈 Analyzed patterns for 159 countries

🌍 Top 10 Warmest Countries (Average):
+-------------+------------------+------------------+-------------+
|      Country|  overall_avg_temp|          temp_std|total_records|
+-------------+------------------+------------------+-------------+
|     Djibouti|29.152790108564513|3.2324564540287923|         1797|
|        Niger| 28.14555167114869| 3.519248026492215|         5763|
|        Sudan|28.072830827505804| 2.935183581307311|        18798|
| Burkina Faso|27.815294546436274|1.9054975735686468|         3954|
|         Mali|27.590490834668028|2.7273737990041114|         5931|
|         Chad|27.189829394812683|2.0257559835744106|         3786|
|Guinea Bissau|27.057185462319627| 1.770712792916202|         1977|
|   Mauritania|27.021904935064928|3.3754219372504335|         1977|
|        Benin|26.975880208333283|1.6951569235613595|        11862|
|     Cambodia| 26.91813629772836|1.3800966067235185|         2265|
+-------------+------------------+-----



+-----+------------------+------------------+
|month|          avg_temp|  temp_variability|
+-----+------------------+------------------+
|    1|12.109314803863583| 2.575387282728381|
|    2|14.761605055292264| 2.984342390398989|
|    3|20.232187194525885|3.3044091885091045|
|    4|26.016165491937812|3.2813482040092405|
|    5|31.235977692231717|2.9884813895178284|
|    6| 33.50882330097082| 2.236994977424619|
|    7| 32.28132862224791|1.9466240864924103|
|    8|30.908044126129617|1.9012356280201455|
|    9|29.182587376763905|  2.09110357916176|
|   10| 24.82043684673241|2.4556742977411927|
|   11| 18.70908342194295| 2.614305892506581|
|   12|13.453640402265156|2.5413524863691674|
+-----+------------------+------------------+



                                                                                

In [6]:
# Performance Monitoring and Resource Management

def show_spark_performance_metrics():
    """Display Spark performance metrics and resource usage"""
    print("⚡ Spark Performance Metrics:")
    print(f"   Active Spark Context: {spark.sparkContext.applicationId}")
    print(f"   Default Parallelism: {spark.sparkContext.defaultParallelism}")
    print(f"   Spark UI: {spark.sparkContext.uiWebUrl}")
    
    # Show cached DataFrames info
    print("\n💾 Cached DataFrames:")
    try:
        # Get storage level info for cached datasets
        cached_count = 0
        for table in spark.catalog.listTables():
            if table.isTemporary:
                cached_count += 1
                print(f"   - {table.name}")
        
        if cached_count == 0:
            print("   - climate_df (manual cache)")
            print("   - monthly_patterns (if exists)")
            print("   - yearly_trends (if exists)")
            print("   - country_stats (if exists)")
    except Exception as e:
        print(f"   - Unable to list cached tables: {str(e)}")

def optimize_spark_config():
    """Apply runtime optimizations"""
    spark.conf.set("spark.sql.adaptive.enabled", "true")
    spark.conf.set("spark.sql.adaptive.coalescePartitions.enabled", "true")
    spark.conf.set("spark.sql.adaptive.skewJoin.enabled", "true")
    print("✅ Applied runtime optimizations")

def cleanup_resources():
    """Clean up Spark resources"""
    # Unpersist cached DataFrames
    try:
        climate_df.unpersist()
        if 'monthly_patterns' in globals():
            monthly_patterns.unpersist()
        if 'yearly_trends' in globals():
            yearly_trends.unpersist()
        if 'country_stats' in globals():
            country_stats.unpersist()
        print("🧹 Cleaned up cached DataFrames")
    except:
        pass

def stop_spark_session():
    """Properly stop Spark session"""
    cleanup_resources()
    spark.stop()
    print("🛑 Spark session stopped")

# Show current performance metrics
show_spark_performance_metrics()

# Apply optimizations
optimize_spark_config()

⚡ Spark Performance Metrics:
   Active Spark Context: local-1755329434131
   Default Parallelism: 2
   Spark UI: http://34fd580d-f292-40bc-8d12-3ae5cb26456d.internal.cloudapp.net:4040

💾 Cached DataFrames:


   - climate_df (manual cache)
   - monthly_patterns (if exists)
   - yearly_trends (if exists)
   - country_stats (if exists)
✅ Applied runtime optimizations


In [7]:
# 🌍 Comprehensive Climate Analysis Dashboard

import gradio as gr
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import json

class ClimateAnalysisDashboard:
    """
    Comprehensive dashboard for climate data analysis with PySpark backend
    """
    
    def __init__(self, spark_session, climate_dataframe):
        self.spark = spark_session
        self.df = climate_dataframe
        self.countries = self._get_countries()
        self._cache_aggregations()
        print("🚀 Dashboard initialized with PySpark backend")
    
    def _get_countries(self):
        """Get list of countries efficiently"""
        countries = [row['Country'] for row in 
                    self.df.select("Country").distinct().orderBy("Country").collect()]
        return sorted(countries)
    
    def _cache_aggregations(self):
        """Pre-compute and cache common aggregations for faster dashboard response"""
        print("⚡ Pre-computing aggregations for dashboard...")
        
        # Global monthly patterns
        self.global_monthly = self.df.groupBy("month") \
                                   .agg(avg("AverageTemperature").alias("global_avg_temp")) \
                                   .orderBy("month") \
                                   .cache()
        
        # Country-wise statistics
        self.country_stats = self.df.groupBy("Country") \
                                  .agg(
                                      avg("AverageTemperature").alias("avg_temp"),
                                      spark_min("AverageTemperature").alias("min_temp"),
                                      spark_max("AverageTemperature").alias("max_temp"),
                                      stddev("AverageTemperature").alias("std_temp"),
                                      count("*").alias("record_count")
                                  ) \
                                  .filter(col("record_count") > 50) \
                                  .cache()
        
        # Yearly global trends
        self.global_yearly = self.df.groupBy("year") \
                                  .agg(avg("AverageTemperature").alias("global_yearly_avg")) \
                                  .filter(col("year") >= 1900) \
                                  .orderBy("year") \
                                  .cache()
        
        # Force caching
        self.global_monthly.count()
        self.country_stats.count()
        self.global_yearly.count()
        print("✅ Dashboard aggregations cached successfully")
    
    def get_country_analysis(self, country, analysis_type, year_range):
        """Comprehensive country analysis"""
        start_year, end_year = year_range
        
        # Filter data for country and year range
        country_df = self.df.filter(
            (col("Country") == country) & 
            (col("year") >= start_year) & 
            (col("year") <= end_year)
        )
        
        if analysis_type == "Monthly Patterns":
            return self._get_monthly_pattern(country_df, country)
        elif analysis_type == "Yearly Trends":
            return self._get_yearly_trend(country_df, country)
        elif analysis_type == "Temperature Anomalies":
            return self._get_anomalies(country_df, country)
        elif analysis_type == "Seasonal Analysis":
            return self._get_seasonal_analysis(country_df, country)
        else:
            return self._get_overview(country_df, country)
    
    def _get_monthly_pattern(self, country_df, country):
        """Monthly temperature patterns"""
        monthly_data = country_df.groupBy("month") \
                                .agg(
                                    avg("AverageTemperature").alias("avg_temp"),
                                    stddev("AverageTemperature").alias("std_temp"),
                                    count("*").alias("data_points")
                                ) \
                                .orderBy("month") \
                                .toPandas()
        
        if monthly_data.empty:
            return go.Figure().add_annotation(text="No data available", 
                                            xref="paper", yref="paper", x=0.5, y=0.5)
        
        fig = go.Figure()
        
        # Add temperature line with error bars
        fig.add_trace(go.Scatter(
            x=monthly_data['month'],
            y=monthly_data['avg_temp'],
            error_y=dict(type='data', array=monthly_data['std_temp']),
            mode='lines+markers',
            name='Average Temperature',
            line=dict(color='royalblue', width=3),
            marker=dict(size=8)
        ))
        
        fig.update_layout(
            title=f"📅 Monthly Temperature Pattern - {country}",
            xaxis_title="Month",
            yaxis_title="Temperature (°C)",
            template="plotly_white",
            height=500
        )
        
        return fig
    
    def _get_yearly_trend(self, country_df, country):
        """Long-term yearly trends"""
        yearly_data = country_df.groupBy("year") \
                               .agg(avg("AverageTemperature").alias("yearly_avg")) \
                               .orderBy("year") \
                               .toPandas()
        
        if yearly_data.empty:
            return go.Figure().add_annotation(text="No data available", 
                                            xref="paper", yref="paper", x=0.5, y=0.5)
        
        fig = go.Figure()
        
        # Add yearly trend line
        fig.add_trace(go.Scatter(
            x=yearly_data['year'],
            y=yearly_data['yearly_avg'],
            mode='lines+markers',
            name='Yearly Average',
            line=dict(color='darkorange', width=2),
            marker=dict(size=4)
        ))
        
        # Add trend line
        if len(yearly_data) > 5:
            z = np.polyfit(yearly_data['year'], yearly_data['yearly_avg'], 1)
            p = np.poly1d(z)
            fig.add_trace(go.Scatter(
                x=yearly_data['year'],
                y=p(yearly_data['year']),
                mode='lines',
                name='Trend Line',
                line=dict(color='red', width=2, dash='dash')
            ))
        
        fig.update_layout(
            title=f"📈 Long-term Temperature Trend - {country}",
            xaxis_title="Year",
            yaxis_title="Temperature (°C)",
            template="plotly_white",
            height=500
        )
        
        return fig
    
    def _get_anomalies(self, country_df, country):
        """Temperature anomalies detection"""
        # Calculate statistics
        stats = country_df.agg(
            mean("AverageTemperature").alias("mean_temp"),
            stddev("AverageTemperature").alias("std_temp")
        ).collect()[0]
        
        mean_temp = stats["mean_temp"]
        std_temp = stats["std_temp"]
        
        # Get anomalies
        anomalies_data = country_df.withColumn(
            "z_score", (col("AverageTemperature") - mean_temp) / std_temp
        ).withColumn(
            "is_anomaly", (col("z_score") > 2.5) | (col("z_score") < -2.5)
        ).select("dt", "AverageTemperature", "z_score", "is_anomaly", "year", "month") \
         .orderBy("dt") \
         .limit(2000) \
         .toPandas()
        
        if anomalies_data.empty:
            return go.Figure().add_annotation(text="No data available", 
                                            xref="paper", yref="paper", x=0.5, y=0.5)
        
        anomalies_data['dt'] = pd.to_datetime(anomalies_data['dt'])
        normal_data = anomalies_data[~anomalies_data['is_anomaly']]
        anomaly_data = anomalies_data[anomalies_data['is_anomaly']]
        
        fig = go.Figure()
        
        # Normal data points
        fig.add_trace(go.Scatter(
            x=normal_data['dt'],
            y=normal_data['AverageTemperature'],
            mode='markers',
            name='Normal Temperature',
            marker=dict(color='lightblue', size=4, opacity=0.6)
        ))
        
        # Anomaly points
        if not anomaly_data.empty:
            fig.add_trace(go.Scatter(
                x=anomaly_data['dt'],
                y=anomaly_data['AverageTemperature'],
                mode='markers',
                name='Temperature Anomalies',
                marker=dict(
                    color=anomaly_data['z_score'],
                    colorscale='RdYlBu_r',
                    size=8,
                    colorbar=dict(title="Z-Score")
                )
            ))
        
        # Add mean line
        fig.add_hline(y=mean_temp, line_dash="dash", line_color="green", 
                     annotation_text="Mean Temperature")
        
        fig.update_layout(
            title=f"🚨 Temperature Anomalies - {country}",
            xaxis_title="Date",
            yaxis_title="Temperature (°C)",
            template="plotly_white",
            height=500
        )
        
        return fig
    
    def _get_seasonal_analysis(self, country_df, country):
        """Seasonal temperature analysis"""
        seasonal_data = country_df.withColumn(
            "season",
            when((col("month").isin([12, 1, 2])), "Winter")
            .when((col("month").isin([3, 4, 5])), "Spring")
            .when((col("month").isin([6, 7, 8])), "Summer")
            .otherwise("Autumn")
        ).groupBy("season", "year") \
         .agg(avg("AverageTemperature").alias("seasonal_avg")) \
         .orderBy("year") \
         .toPandas()
        
        if seasonal_data.empty:
            return go.Figure().add_annotation(text="No data available", 
                                            xref="paper", yref="paper", x=0.5, y=0.5)
        
        fig = go.Figure()
        
        seasons = ["Winter", "Spring", "Summer", "Autumn"]
        colors = ["lightblue", "lightgreen", "orange", "brown"]
        
        for season, color in zip(seasons, colors):
            season_data = seasonal_data[seasonal_data['season'] == season]
            if not season_data.empty:
                fig.add_trace(go.Scatter(
                    x=season_data['year'],
                    y=season_data['seasonal_avg'],
                    mode='lines+markers',
                    name=season,
                    line=dict(color=color, width=2),
                    marker=dict(size=4)
                ))
        
        fig.update_layout(
            title=f"🍂 Seasonal Temperature Analysis - {country}",
            xaxis_title="Year",
            yaxis_title="Temperature (°C)",
            template="plotly_white",
            height=500
        )
        
        return fig
    
    def _get_overview(self, country_df, country):
        """Country overview with key statistics"""
        stats = country_df.agg(
            avg("AverageTemperature").alias("avg_temp"),
            spark_min("AverageTemperature").alias("min_temp"),
            spark_max("AverageTemperature").alias("max_temp"),
            stddev("AverageTemperature").alias("std_temp"),
            count("*").alias("total_records")
        ).collect()[0]
        
        # Create a summary chart
        fig = make_subplots(
            rows=2, cols=2,
            subplot_titles=("Temperature Distribution", "Monthly Averages", 
                          "Data Coverage", "Temperature Range"),
            specs=[[{"type": "histogram"}, {"type": "bar"}],
                   [{"type": "scatter"}, {"type": "indicator"}]]
        )
        
        # Get sample data for visualizations
        sample_data = country_df.select("AverageTemperature", "month", "year") \
                               .sample(0.1) \
                               .limit(1000) \
                               .toPandas()
        
        if not sample_data.empty:
            # Temperature distribution
            fig.add_trace(go.Histogram(x=sample_data['AverageTemperature'], 
                                     name="Temperature Distribution"),
                         row=1, col=1)
            
            # Monthly averages
            monthly_avg = sample_data.groupby('month')['AverageTemperature'].mean()
            fig.add_trace(go.Bar(x=monthly_avg.index, y=monthly_avg.values,
                               name="Monthly Avg"),
                         row=1, col=2)
            
            # Data coverage over years
            yearly_count = sample_data.groupby('year').size()
            fig.add_trace(go.Scatter(x=yearly_count.index, y=yearly_count.values,
                                   mode='lines', name="Data Points"),
                         row=2, col=1)
        
        # Temperature range indicator
        fig.add_trace(go.Indicator(
            mode="gauge+number+delta",
            value=stats["avg_temp"],
            domain={'x': [0, 1], 'y': [0, 1]},
            title={'text': "Avg Temperature (°C)"},
            gauge={'axis': {'range': [None, 50]},
                   'bar': {'color': "darkblue"},
                   'steps': [{'range': [0, 20], 'color': "lightgray"},
                            {'range': [20, 35], 'color': "gray"}],
                   'threshold': {'line': {'color': "red", 'width': 4},
                               'thickness': 0.75, 'value': stats["avg_temp"]}}),
            row=2, col=2)
        
        fig.update_layout(
            title=f"📊 Climate Overview - {country}",
            height=600,
            showlegend=False
        )
        
        return fig
    
    def get_global_comparison(self, countries_list, metric):
        """Global comparison between countries"""
        if not countries_list:
            return go.Figure().add_annotation(text="Please select countries to compare", 
                                            xref="paper", yref="paper", x=0.5, y=0.5)
        
        comparison_data = self.country_stats.filter(col("Country").isin(countries_list)) \
                                          .toPandas()
        
        if comparison_data.empty:
            return go.Figure().add_annotation(text="No data available for selected countries", 
                                            xref="paper", yref="paper", x=0.5, y=0.5)
        
        fig = go.Figure()
        
        if metric == "Average Temperature":
            fig.add_trace(go.Bar(
                x=comparison_data['Country'],
                y=comparison_data['avg_temp'],
                name='Average Temperature',
                marker_color='skyblue'
            ))
            fig.update_layout(yaxis_title="Temperature (°C)")
            
        elif metric == "Temperature Range":
            fig.add_trace(go.Bar(
                x=comparison_data['Country'],
                y=comparison_data['max_temp'] - comparison_data['min_temp'],
                name='Temperature Range',
                marker_color='coral'
            ))
            fig.update_layout(yaxis_title="Temperature Range (°C)")
            
        elif metric == "Temperature Variability":
            fig.add_trace(go.Bar(
                x=comparison_data['Country'],
                y=comparison_data['std_temp'],
                name='Temperature Std Dev',
                marker_color='lightgreen'
            ))
            fig.update_layout(yaxis_title="Standard Deviation (°C)")
        
        fig.update_layout(
            title=f"🌍 Global Comparison - {metric}",
            xaxis_title="Countries",
            template="plotly_white",
            height=500
        )
        
        return fig
    
    def get_performance_metrics(self):
        """Get Spark performance metrics"""
        try:
            app_id = self.spark.sparkContext.applicationId
            parallelism = self.spark.sparkContext.defaultParallelism
            ui_url = self.spark.sparkContext.uiWebUrl
            
            # Count cached DataFrames (approximate)
            cached_rdds = 4  # We know we cache: climate_df, global_monthly, country_stats, global_yearly
            
            metrics = f"""
            🚀 **Spark Performance Dashboard**
            
            - **Application ID**: {app_id}
            - **Default Parallelism**: {parallelism}
            - **Cached DataFrames**: {cached_rdds}
            - **Spark UI**: [Open Dashboard]({ui_url})
            
            ✅ **Status**: All systems operational
            """
            
            return metrics
        except Exception as e:
            return f"❌ **Error**: {str(e)}"

# Initialize the dashboard
dashboard = ClimateAnalysisDashboard(spark, climate_df)
print("🎯 Climate Analysis Dashboard ready!")

                                                                                

⚡ Pre-computing aggregations for dashboard...


                                                                                

✅ Dashboard aggregations cached successfully
🚀 Dashboard initialized with PySpark backend
🎯 Climate Analysis Dashboard ready!


In [8]:
# 🚀 ENHANCED DASHBOARD WITH INTEGRATED TEMPERATURE PREDICTIONS

import gradio as gr
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Mock ML Evaluator for temperature predictions
class MockMLEvaluator:
    """Mock ML evaluator for temperature predictions until real ML models are implemented"""
    
    def predict_temperature(self, country, forecast_years, model_type):
        """Generate mock predictions with realistic patterns"""
        try:
            # Get historical data for the country
            historical_data = dashboard.get_country_historical_data(country)
            if not historical_data:
                raise ValueError(f"No historical data available for {country}")
            
            # Create mock predictions based on historical trends
            last_year = 2013  # Based on the dataset
            future_years = list(range(last_year + 1, last_year + forecast_years + 1))
            
            # Simple trend-based prediction (mock)
            base_temp = historical_data['avg_temp']
            trend = 0.02 if model_type == "Linear Regression" else 0.03  # Mock warming trend
            
            predictions = []
            for i, year in enumerate(future_years):
                predicted_temp = base_temp + (trend * i) + np.random.normal(0, 0.5)
                predictions.append({
                    'year': year,
                    'predicted_temp': predicted_temp,
                    'confidence_upper': predicted_temp + 1.5,
                    'confidence_lower': predicted_temp - 1.5
                })
            
            pred_df = pd.DataFrame(predictions)
            
            # Mock metrics
            metrics = {
                'r2_score': 0.835 if model_type == "Random Forest" else 0.789,
                'rmse': 2.1 if model_type == "Gradient Boosting" else 2.8,
                'f1_score': 0.821 if model_type == "Gradient Boosting" else 0.756
            }
            
            return pred_df, metrics
            
        except Exception as e:
            raise Exception(f"Prediction failed: {str(e)}")

# Initialize mock evaluator
evaluator = MockMLEvaluator()

def create_enhanced_dashboard():
    """Create enhanced Gradio dashboard with temperature prediction capabilities"""
    
    def analyze_country(country, analysis_type, start_year, end_year):
        """Country analysis function with error handling"""
        try:
            if analysis_type == "Overview":
                # Use the existing _get_overview method instead of missing get_country_overview
                fig = dashboard._get_overview(
                    dashboard.df.filter(
                        (col("Country") == country) & 
                        (col("year") >= start_year) & 
                        (col("year") <= end_year)
                    ), country
                )
                return fig, f"✅ **Analysis complete** for {country}"
            else:
                fig = dashboard.get_country_analysis(country, analysis_type, (start_year, end_year))
                return fig, f"✅ **{analysis_type}** analysis complete for {country}"
        except Exception as e:
            # Create an error figure
            error_fig = go.Figure()
            error_fig.add_annotation(
                text=f"Error: {str(e)}", 
                xref="paper", yref="paper", 
                x=0.5, y=0.5,
                showarrow=False,
                font=dict(size=16, color="red")
            )
            return error_fig, f"❌ **Error**: {str(e)}"
    
    def predict_temperature(country, forecast_years, model_type):
        """Temperature prediction function with enhanced error handling"""
        try:
            # Use the mock evaluator for prediction
            predictions, metrics = evaluator.predict_temperature(country, forecast_years, model_type)
            
            # Create prediction visualization
            fig = go.Figure()
            
            # Historical data
            historical_data = dashboard.get_country_historical_data(country)
            if historical_data:
                # Get recent historical trend for visualization
                recent_data = dashboard.df.filter(
                    (col("Country") == country) & 
                    (col("year") >= 2000)
                ).groupBy("year") \
                 .agg(avg("AverageTemperature").alias("yearly_avg")) \
                 .orderBy("year") \
                 .toPandas()
                
                if not recent_data.empty:
                    fig.add_trace(go.Scatter(
                        x=recent_data['year'],
                        y=recent_data['yearly_avg'],
                        mode='lines',
                        name='Historical Data',
                        line=dict(color='blue', width=2)
                    ))
            
            # Prediction line
            fig.add_trace(go.Scatter(
                x=predictions['year'],
                y=predictions['predicted_temp'],
                mode='lines+markers',
                name=f'{model_type} Prediction',
                line=dict(color='red', width=3),
                marker=dict(size=6)
            ))
            
            # Confidence intervals
            if 'confidence_upper' in predictions.columns and 'confidence_lower' in predictions.columns:
                fig.add_trace(go.Scatter(
                    x=predictions['year'].tolist() + predictions['year'][::-1].tolist(),
                    y=predictions['confidence_upper'].tolist() + predictions['confidence_lower'][::-1].tolist(),
                    fill='toself',
                    fillcolor='rgba(255, 0, 0, 0.2)',
                    line=dict(color='rgba(255, 255, 255, 0)'),
                    name='Confidence Interval',
                    showlegend=True
                ))
            
            fig.update_layout(
                title=f"🔮 Temperature Prediction for {country} ({forecast_years} years)",
                xaxis_title="Year",
                yaxis_title="Temperature (°C)",
                template="plotly_white",
                height=500
            )
            
            # Model performance info
            performance_info = f"""
            **🤖 Model Performance - {model_type}**
            - **R² Score**: {metrics.get('r2_score', 0):.3f}
            - **RMSE**: ±{metrics.get('rmse', 0):.2f}°C
            - **F1 Score**: {metrics.get('f1_score', 0):.3f}
            - **Forecast Period**: {forecast_years} years
            - **Status**: Mock predictions (demo mode)
            """
            
            return fig, performance_info
            
        except Exception as e:
            error_fig = go.Figure()
            error_fig.add_annotation(
                text=f"Prediction Error: {str(e)}", 
                xref="paper", yref="paper", 
                x=0.5, y=0.5,
                showarrow=False,
                font=dict(size=16, color="red")
            )
            return error_fig, f"❌ **Error**: {str(e)}"
    
    def compare_countries(countries, metric):
        """Country comparison function"""
        try:
            fig = dashboard.get_global_comparison(countries, metric)
            return fig
        except Exception as e:
            error_fig = go.Figure()
            error_fig.add_annotation(
                text=f"Error: {str(e)}", 
                xref="paper", yref="paper", 
                x=0.5, y=0.5,
                showarrow=False,
                font=dict(size=16, color="red")
            )
            return error_fig
    
    # Create the Gradio interface
    with gr.Blocks(
        title="🌍 Enhanced Climate Analysis Dashboard",
        theme=gr.themes.Soft(),
        css="""
        .gradio-container {
            font-family: 'Segoe UI', system-ui, sans-serif;
        }
        .tab-nav button {
            font-size: 14px;
            font-weight: 500;
        }
        """
    ) as interface:
        
        # Header
        gr.Markdown("""
        # 🌍 Enhanced Climate Analysis Dashboard
        
        **🔥 NEW**: AI-powered temperature predictions | **⚡ Engine**: PySpark 4.0.0 | **📊 Dataset**: Global Temperature Analysis
        
        Explore climate patterns, compare countries, and predict future temperatures using advanced machine learning models.
        """)
        
        with gr.Tabs():
            # Tab 1: Country Analysis
            with gr.Tab("🏃‍♂️ Country Analysis"):
                gr.Markdown("### Detailed climate analysis for individual countries")
                
                with gr.Row():
                    with gr.Column(scale=1):
                        country_input = gr.Dropdown(
                            choices=dashboard.countries,
                            value="Pakistan",
                            label="🌏 Select Country",
                            filterable=True
                        )
                        
                        analysis_type = gr.Radio(
                            choices=[
                                "Overview",
                                "Monthly Patterns", 
                                "Yearly Trends", 
                                "Temperature Anomalies",
                                "Seasonal Analysis"
                            ],
                            value="Overview",
                            label="📊 Analysis Type"
                        )
                        
                        start_year = gr.Slider(
                            minimum=1750, maximum=2013, value=1900,
                            step=1, label="📅 Start Year"
                        )
                        
                        end_year = gr.Slider(
                            minimum=1750, maximum=2013, value=2013,
                            step=1, label="📅 End Year"  
                        )
                        
                        analyze_btn = gr.Button("🔍 Analyze", variant="primary")
                    
                    with gr.Column(scale=2):
                        country_plot = gr.Plot(label="📊 Analysis Results")
                        analysis_info = gr.Markdown("")
                
                # Connect analysis function
                analyze_btn.click(
                    fn=analyze_country,
                    inputs=[country_input, analysis_type, start_year, end_year],
                    outputs=[country_plot, analysis_info]
                )
            
            # Tab 2: Temperature Predictions (FIXED!)
            with gr.Tab("🔮 AI Temperature Predictions"):
                gr.Markdown("""
                ### AI-Powered Climate Predictions
                
                **🤖 Machine Learning Models**: Linear Regression, Random Forest, Gradient Boosting  
                **📊 Status**: Demo mode with mock predictions  
                **⚡ Technology**: Optimized PySpark processing
                """)
                
                with gr.Row():
                    with gr.Column(scale=1):
                        pred_country = gr.Dropdown(
                            choices=dashboard.countries,
                            value="Pakistan",
                            label="🌏 Select Country",
                            filterable=True
                        )
                        
                        forecast_years = gr.Slider(
                            minimum=1, maximum=20, value=10,
                            step=1, label="📅 Forecast Years"
                        )
                        
                        model_type = gr.Radio(
                            choices=["Linear Regression", "Random Forest", "Gradient Boosting"],
                            value="Linear Regression",
                            label="🤖 ML Model"
                        )
                        
                        predict_btn = gr.Button("🔮 Generate Prediction", variant="primary")
                        
                        gr.Markdown("""
                        **💡 Model Information:**
                        - **Linear Regression**: Fast, interpretable, good for trends
                        - **Random Forest**: Robust, handles complexity well
                        - **Gradient Boosting**: High accuracy, advanced ensemble method
                        
                        *Note: Currently running in demo mode with mock predictions*
                        """)
                    
                    with gr.Column(scale=2):
                        prediction_plot = gr.Plot(label="🔮 Temperature Prediction")
                        prediction_info = gr.Markdown("")
                
                # Connect prediction function
                predict_btn.click(
                    fn=predict_temperature,
                    inputs=[pred_country, forecast_years, model_type],
                    outputs=[prediction_plot, prediction_info]
                )
            
            # Tab 3: Global Comparison
            with gr.Tab("🌍 Global Comparison"):
                gr.Markdown("### Compare climate metrics across multiple countries")
                
                with gr.Row():
                    with gr.Column(scale=1):
                        countries_input = gr.CheckboxGroup(
                            choices=dashboard.countries,
                            value=["Pakistan", "India", "United States", "China"],
                            label="🌏 Select Countries"
                        )
                        
                        metric_input = gr.Radio(
                            choices=["Average Temperature", "Temperature Range", "Temperature Variability"],
                            value="Average Temperature",
                            label="📊 Comparison Metric"
                        )
                        
                        compare_btn = gr.Button("🔄 Compare Countries", variant="primary")
                    
                    with gr.Column(scale=2):
                        comparison_plot = gr.Plot(label="🌍 Global Comparison")
                
                # Connect comparison function
                compare_btn.click(
                    fn=compare_countries,
                    inputs=[countries_input, metric_input],
                    outputs=comparison_plot
                )
            
            # Tab 4: System Status
            with gr.Tab("⚡ System Status"):
                gr.Markdown("### PySpark Performance & System Monitoring")
                
                performance_display = gr.Markdown(
                    value=dashboard.get_performance_metrics(),
                    label="🔧 System Metrics"
                )
                
                refresh_btn = gr.Button("🔄 Refresh Metrics", variant="secondary")
                refresh_btn.click(
                    fn=lambda: dashboard.get_performance_metrics(),
                    outputs=performance_display
                )
                
                gr.Markdown(f"""
                ### 📊 Enhanced Dataset Information
                - **Source**: Global Land Temperatures by City
                - **Processing Engine**: Apache Spark 4.0.0
                - **Total Countries**: {len(dashboard.countries)}
                - **ML Capabilities**: Temperature prediction (demo mode)
                - **Cache Strategy**: Intelligent DataFrame caching
                - **Optimization**: Adaptive Query Execution enabled
                
                ### 🎯 Available Operations
                - ✅ Real-time country analysis
                - ✅ Multi-country comparisons ({len(dashboard.countries)} countries)
                - ✅ 🔮 AI temperature predictions (demo)
                - ✅ Anomaly detection
                - ✅ Seasonal pattern analysis
                - ✅ Interactive visualizations
                """)
        
        # Footer
        gr.Markdown(f"""
        ---
        **🚀 Powered by**: PySpark + Gradio | **📊 Dataset**: {len(dashboard.countries)} Countries | **⚡ Status**: Ready for Analysis
        """)
    
    return interface

# Add missing method to dashboard class
def get_country_historical_data(self, country):
    """Get historical data summary for a country"""
    try:
        historical_stats = self.df.filter(col("Country") == country) \
                                 .agg(avg("AverageTemperature").alias("avg_temp")) \
                                 .collect()[0]
        
        return {
            'avg_temp': historical_stats['avg_temp'],
            'country': country
        }
    except:
        return None

# Add the method to the existing dashboard instance
dashboard.get_country_historical_data = get_country_historical_data.__get__(dashboard, dashboard.__class__)

# Create the enhanced dashboard
print("🎯 Creating enhanced climate dashboard...")
enhanced_dashboard = create_enhanced_dashboard()

print("✅ Enhanced Dashboard created successfully!")
print("📊 Features:")
print("   - Real-time country analysis")
print(f"   - Global comparisons ({len(dashboard.countries)} countries)")
print("   - 🔮 AI temperature predictions (demo mode)")
print("   - System monitoring")
print("   - Interactive visualizations")
print()
print("🚀 Ready to launch! Use enhanced_dashboard.launch() when needed.")

# Store dashboard for manual launch
globals()['enhanced_dashboard'] = enhanced_dashboard

🎯 Creating enhanced climate dashboard...


✅ Enhanced Dashboard created successfully!
📊 Features:
   - Real-time country analysis
   - Global comparisons (159 countries)
   - 🔮 AI temperature predictions (demo mode)
   - System monitoring
   - Interactive visualizations

🚀 Ready to launch! Use enhanced_dashboard.launch() when needed.


In [9]:
# 🎛️ Dashboard Management & Quick Actions

def restart_dashboard():
    """Restart the dashboard with fresh data"""
    try:
        global dashboard, main_dashboard
        print("🔄 Restarting dashboard...")
        
        # Refresh data and recreate dashboard
        dashboard = ClimateAnalysisDashboard(spark, climate_df)
        main_dashboard = create_main_dashboard()
        
        print("✅ Dashboard restarted successfully!")
        return main_dashboard
    except Exception as e:
        print(f"❌ Error restarting dashboard: {e}")
        return None

def quick_stats():
    """Show quick statistics about the dataset"""
    try:
        total_records = climate_df.count()
        countries_count = climate_df.select("Country").distinct().count()
        date_range = climate_df.agg(
            spark_min("dt").alias("min_date"),
            spark_max("dt").alias("max_date")
        ).collect()[0]
        
        avg_temp = climate_df.agg(avg("AverageTemperature")).collect()[0][0]
        
        print("📊 DATASET QUICK STATS")
        print("=" * 40)
        print(f"📈 Total Records: {total_records:,}")
        print(f"🌍 Countries: {countries_count}")
        print(f"📅 Date Range: {date_range['min_date']} to {date_range['max_date']}")
        print(f"🌡️ Global Average Temperature: {avg_temp:.2f}°C")
        print("=" * 40)
        
        return {
            "total_records": total_records,
            "countries": countries_count,
            "date_range": f"{date_range['min_date']} to {date_range['max_date']}",
            "avg_temp": avg_temp
        }
    except Exception as e:
        print(f"❌ Error getting stats: {e}")
        return {}

def check_spark_health():
    """Check Spark cluster health"""
    try:
        print("⚡ SPARK HEALTH CHECK")
        print("=" * 40)
        
        # Test basic operations
        test_count = climate_df.limit(10).count()
        print(f"✅ Basic Operations: Working (test count: {test_count})")
        
        # Check cache status
        cached_tables = spark.catalog.listTables()
        print(f"💾 Cached Tables: {len(cached_tables)}")
        
        # Check Spark UI
        ui_url = spark.sparkContext.uiWebUrl
        print(f"🌐 Spark UI: {ui_url}")
        
        # Check memory usage
        sc = spark.sparkContext
        print(f"🔧 Default Parallelism: {sc.defaultParallelism}")
        print(f"📱 Application ID: {sc.applicationId}")
        
        print("✅ All systems operational!")
        print("=" * 40)
        
    except Exception as e:
        print(f"❌ Spark health check failed: {e}")

# Quick actions buttons
print("🎛️ DASHBOARD CONTROL PANEL")
print("=" * 50)
print("Available commands:")
print("  📊 quick_stats()         - Show dataset statistics")
print("  ⚡ check_spark_health()  - Check Spark cluster status") 
print("  🔄 restart_dashboard()   - Restart the dashboard")
print("=" * 50)

# Show initial stats
quick_stats()

🎛️ DASHBOARD CONTROL PANEL
Available commands:
  📊 quick_stats()         - Show dataset statistics
  ⚡ check_spark_health()  - Check Spark cluster status
  🔄 restart_dashboard()   - Restart the dashboard


                                                                                

📊 DATASET QUICK STATS
📈 Total Records: 8,599,212
🌍 Countries: 159
📅 Date Range: 1743-11-01 to 2013-09-01
🌡️ Global Average Temperature: 16.73°C


{'total_records': 8599212,
 'countries': 159,
 'date_range': '1743-11-01 to 2013-09-01',
 'avg_temp': 16.727432636249816}

In [None]:
# 🌐 Launch the Enhanced Climate Dashboard

print("🚀 Ready to launch the Enhanced Climate Analysis Dashboard!")
print("📋 Make sure you have run all the setup cells (3-9) first.")
print()

# Uncomment the line below to launch the dashboard:
launch_dashboard(share=True, port=7860)

# Or run this for local access only:
# launch_dashboard()

print("💡 Tips:")
print("   - Uncomment one of the launch commands above")
print("   - Or run: launch_dashboard() in a new cell")
print("   - For public sharing: launch_dashboard(share=True)")
print("   - For custom port: launch_dashboard(port=8080)")

🚀 Ready to launch the Enhanced Climate Analysis Dashboard!
📋 Make sure you have run all the setup cells (3-9) first.

🚀 Launching Enhanced Climate Dashboard on port 7860...
📊 Dashboard Features:
   - Country Analysis with PySpark backend
   - Global Comparisons
   - AI Temperature Predictions (demo)
   - System Monitoring
   - Interactive Visualizations

🌐 Dashboard should be available at: http://localhost:7860
🔗 Public link will be generated...
💡 Tips:
   - Uncomment one of the launch commands above
   - Or run: launch_dashboard() in a new cell
   - For public sharing: launch_dashboard(share=True)
   - For custom port: launch_dashboard(port=8080)


* Running on local URL:  http://0.0.0.0:7860
* Running on public URL: https://1d07a3ab1b2d2c144c.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
* Running on public URL: https://1d07a3ab1b2d2c144c.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


                                                                                