In [None]:
from elasticsearch import Elasticsearch
import json
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import seaborn as sns

In [None]:
es = Elasticsearch(
    "https://elastic:elastic@127.0.0.1:9200",
    verify_certs=False,  # 测试环境可临时关闭证书校验
    ssl_show_warn=False,
    basic_auth=("elastic", "elastic")
)

# Australian Politics



In [None]:
election_keywords = [
    # General election terms
    "election", "vote", "ballot", "polling", "campaign", "candidate", "political party",
    "preference", "electorate", "constituency", "seat", "margin", "swing", "democracy",
    
    # Australian political parties
    "liberal party", "labor party", "alp", "lnp", "nationals", "greens", "one nation",
    "united australia party", "uap", "independents", "coalition", "teals", 
    
    # Political figures
    "prime minister", "anthony albanese", "albo", "scott morrison", "peter dutton", 
    "adam bandt", "pauline hanson", "clive palmer", "bob katter", "jacqui lambie",
    
    # Policy issues
    "climate change", "housing", "cost of living", "economy", "taxation", "healthcare", 
    "medicare", "education", "immigration", "refugees", "indigenous", "first nations",
    "aboriginal", "defence", "aukus", "infrastructure", "corruption", "integrity",
    
    # Electoral authorities
    "aec", "australian electoral commission", "preferential voting", "compulsory voting",
    "electoral roll", "senate", "house of representatives", "parliament", "hung parliament",
    
    # Election events
    "campaign launch", "policy announcement", "debate", "press conference", "town hall",
    "doorstop", "scandal", "gaffe", "marginal seat", "safe seat",
    
    # Media and social media terms
    "newspoll", "polling", "opinion poll", "media coverage", "social media", "hashtag",
    "viral", "fact check", "misinformation", "political advertising",
    
    # Election result terms
    "counting", "vote count", "scrutineer", "results", "win", "lose", "victory", "defeat",
    "landslide", "tight race", "swing", "mandate", "primary vote", "two-party preferred",
    "2pp", "preferences", "minority government", "majority government",
    
    # Election time and place
    "election day", "pre-poll", "early voting", "postal vote", "voting booth", "polling place",
    "ballot box", "electoral commission",
    
    # Voter behavior and sentiment
    "voter", "turnout", "apathy", "engagement", "frustration", "hope", "change", "status quo",
    "protest vote", "tactical voting", "donkey vote", "informal vote"
]

# SA3 region names in Australia
sa3_names = [
    "Adelaide City", "Adelaide Hills", "Albany", "Albury", "Alice Springs", "Armadale", 
    "Armidale", "Auburn", "Augusta - Margaret River - Busselton", "Bald Hills - Everton Park", 
    "Ballarat", "Bankstown", "Banyule", "Barkly", "Barossa", "Barwon - West", "Bathurst", 
    "Baulkham Hills", "Baw Baw", "Bayside", "Bayswater - Bassendean", "Beaudesert", "Beenleigh", 
    "Belconnen", "Belmont - Victoria Park", "Bendigo", "Biloela", "Blacktown", "Blacktown - North", 
    "Blue Mountains", "Blue Mountains - South", "Boroondara", "Botany", "Bourke - Cobar - Coonamble", 
    "Bowen Basin - North", "Bribie - Beachmere", "Brighton", "Brimbank", "Bringelly - Green Valley", 
    "Brisbane Inner", "Brisbane Inner - East", "Brisbane Inner - North", "Brisbane Inner - West", 
    "Broadbeach - Burleigh", "Broken Hill and Far West", "Browns Plains", "Brunswick - Coburg", 
    "Buderim", "Bunbury", "Bundaberg", "Burnett", "Burnie - Ulverstone", "Burnside", "Caboolture", 
    "Caboolture Hinterland", "Cairns - North", "Cairns - South", "Caloundra", "Camden", "Campaspe", 
    "Campbelltown (NSW)", "Campbelltown (SA)", "Canada Bay", "Canberra East", "Canning", "Canterbury", 
    "Capalaba", "Cardinia", "Carindale", "Carlingford", "Casey - North", "Casey - South", "Centenary", 
    "Central Highlands (Qld)", "Central Highlands (Tas.)", "Charles Sturt", "Charters Towers - Ayr - Ingham", 
    "Chatswood - Lane Cove", "Chermside", "Christmas Island", "Clarence Valley", "Cleveland - Stradbroke", 
    "Cockburn", "Cocos (Keeling) Islands", "Coffs Harbour", "Colac - Corangamite", "Coolangatta", 
    "Cottesloe - Claremont", "Creswick - Daylesford - Ballan", "Cronulla - Miranda - Caringbah", 
    "Daly - Tiwi - West Arnhem", "Dandenong", "Dapto - Port Kembla", "Darebin - North", "Darebin - South", 
    "Darling Downs (West) - Maranoa", "Darling Downs - East", "Darwin City", "Darwin Suburbs", "Devonport", 
    "Dubbo", "Dural - Wisemans Ferry", "East Arnhem", "East Pilbara", "Eastern Suburbs - North", 
    "Eastern Suburbs - South", "Esperance", "Essendon", "Eyre Peninsula and South West", "Fairfield", 
    "Far North", "Fleurieu - Kangaroo Island", "Forest Lake - Oxley", "Frankston", "Fremantle", "Gascoyne", 
    "Gawler - Two Wells", "Geelong", "Gippsland - East", "Gippsland - South West", "Gladstone", "Glen Eira", 
    "Glenelg - Southern Grampians", "Gold Coast - North", "Gold Coast Hinterland", "Goldfields", "Gosford", 
    "Gosnells", "Goulburn - Mulwaree", "Grampians", "Granite Belt", "Great Lakes", "Griffith - Murrumbidgee (West)", 
    "Gungahlin", "Gympie - Cooloola", "Hawkesbury", "Heathcote - Castlemaine - Kyneton", "Hervey Bay", 
    "Hobart - North East", "Hobart - North West", "Hobart - South and West", "Hobart Inner", "Hobsons Bay", 
    "Holdfast Bay", "Holland Park - Yeronga", "Hornsby", "Huon - Bruny Island", "Hurstville", 
    "Illawarra Catchment Reserve", "Innisfail - Cassowary Coast", "Inverell - Tenterfield", "Ipswich Hinterland", 
    "Ipswich Inner", "Jervis Bay", "Jimboomba", "Joondalup", "Kalamunda", "Katherine", "Keilor", 
    "Kempsey - Nambucca", "Kenmore - Brookfield - Moggill", "Kiama - Shellharbour", "Kimberley", "Kingston", 
    "Knox", "Kogarah - Rockdale", "Ku-ring-gai", "Kwinana", "Lachlan Valley", "Lake Macquarie - East", 
    "Lake Macquarie - West", "Latrobe Valley", "Launceston", "Leichhardt", "Limestone Coast", "Litchfield", 
    "Lithgow - Mudgee", "Liverpool", "Loddon - Elmore", "Loganlea - Carbrook", "Lord Howe Island", 
    "Lower Hunter", "Lower Murray", "Lower North", "Macedon Ranges", "Mackay", "Maitland", "Mandurah", 
    "Manjimup", "Manly", "Manningham - East", "Manningham - West", "Maribyrnong", "Marion", "Maroochy", 
    "Maroondah", "Marrickville - Sydenham - Petersham", "Maryborough", "Maryborough - Pyrenees", 
    "Meander Valley - West Tamar", "Melbourne City", "Melton - Bacchus Marsh", "Melville", 
    "Merrylands - Guildford", "Mid North", "Mid West", "Mildura", "Mitcham", "Moira", "Molonglo", 
    "Monash", "Moree - Narrabri", "Moreland - North", "Mornington Peninsula", "Mount Druitt", "Mt Gravatt", 
    "Mudgeeraba - Tallebudgera", "Mundaring", "Murray River - Swan Hill", "Murray and Mallee", "Nambour", 
    "Narangba - Burpengary", "Nathan", "Nerang", "Newcastle", "Nillumbik - Kinglake", "Noosa", 
    "Noosa Hinterland", "Norfolk Island", "North Canberra", "North East", "North Lakes", 
    "North Sydney - Mosman", "Norwood - Payneham - St Peters", "Nundah", "Onkaparinga", "Orange", 
    "Ormeau - Oxenford", "Outback - North", "Outback - North and East", "Outback - South", "Palmerston", 
    "Parramatta", "Pennant Hills - Epping", "Penrith", "Perth City", "Pittwater", "Playford", 
    "Port Adelaide - East", "Port Adelaide - West", "Port Douglas - Daintree", "Port Macquarie", 
    "Port Phillip", "Port Stephens", "Prospect - Walkerville", "Queanbeyan", "Redcliffe", 
    "Richmond - Windsor", "Richmond Valley - Coastal", "Richmond Valley - Hinterland", "Robina", 
    "Rockhampton", "Rockingham", "Rocklea - Acacia Ridge", "Rouse Hill - McGraths Hill", 
    "Ryde - Hunters Hill", "Salisbury", "Sandgate", "Serpentine - Jarrahdale", "Shepparton", 
    "Sherwood - Indooroopilly", "Shoalhaven", "Snowy Mountains", "Sorell - Dodges Ferry", 
    "South Canberra", "South Coast", "South East Coast", "South Perth", "Southern Highlands", 
    "Southport", "Springfield - Redbank", "Springwood - Kingston", "St Marys", "Stirling", 
    "Stonnington - East", "Stonnington - West", "Strathfield - Burwood - Ashfield", "Strathpine", 
    "Sunbury", "Sunnybank", "Sunshine Coast Hinterland", "Surf Coast - Bellarine Peninsula", 
    "Surfers Paradise", "Sutherland - Menai - Heathcote", "Swan", "Sydney Inner City", 
    "Tablelands (East) - Kuranda", "Tamworth - Gunnedah", "Taree - Gloucester", "Tea Tree Gully", 
    "The Gap - Enoggera", "The Hills District", "Toowoomba", "Townsville", "Tuggeranong", 
    "Tullamarine - Broadmeadows", "Tumut - Tumbarumba", "Tweed Valley", "Unley", "Upper Goulburn Valley", 
    "Upper Hunter", "Upper Murray exc. Albury", "Uriarra - Namadgi", "Wagga Wagga", "Wangaratta - Benalla", 
    "Wanneroo", "Warringah", "Warrnambool", "Wellington", "West Coast", "West Pilbara", "West Torrens", 
    "Weston Creek", "Wheat Belt - North", "Wheat Belt - South", "Whitehorse - East", "Whitehorse - West", 
    "Whitsunday", "Whittlesea - Wallan", "Woden Valley", "Wodonga - Alpine", "Wollondilly", "Wollongong", 
    "Wyndham", "Wynnum - Manly", "Wyong", "Yarra", "Yarra Ranges", "Yorke Peninsula", "Young - Yass"
]

# Add major cities and states/territories to improve location matching
major_locations = [
    "Sydney", "Melbourne", "Brisbane", "Perth", "Adelaide", "Canberra", "Hobart", "Darwin",
    "Gold Coast", "Newcastle", "Wollongong", "Geelong", "Cairns", "Townsville",
    "New South Wales", "Victoria", "Queensland", "Western Australia", "South Australia", 
    "Tasmania", "Northern Territory", "Australian Capital Territory",
    "NSW", "VIC", "QLD", "WA", "SA", "TAS", "NT", "ACT"
]

# Combine all locations
all_locations = sa3_names + major_locations

# Common Australian election-related tags
election_tags = [
    "auspol", "ausvotes", "australiavotes", "fedpol", "democracy", "auspol2025",
    "auselection", "auselection2025", "election", "elections", "electionday",
    "alp", "greens", "liberals", "nswpol", "vicpol", "qldpol"
]


### Draw figures

In [None]:
# Execute query
try:
    # First validate connection
    if not es.ping():
        print("Warning: Unable to connect to Elasticsearch. Please check connection settings.")
    else:
        print("Elasticsearch connection successful!")
    
    # Check if index exists
    if not es.indices.exists(index="socialplatform"):
        print("Error: Index 'socialplatform' does not exist. Please check index name.")
    else:
        print("Index 'socialplatform' found")
        
        # Execute query
        all_resp = es.search(
            index="socialplatform",
            body=query_body
        )
        
        total_hits = all_resp["hits"]["total"]["value"]
        print(f"Total hits: {total_hits}")
        
        if total_hits == 0:
            print("No matching records found. Please try the following:")
            print("1. Reduce the number of SA3 regions used")
            print("2. Use more general search terms")
            print("3. Verify if relevant data exists in the index")
            
            # Create empty DataFrame to avoid subsequent processing errors
            df = pd.DataFrame(columns=["location", "sentiment", "date", "tags", "election_terms", "content_preview"])
        else:
            # Extract results
            hits = all_resp["hits"]["hits"]
            print(f"Actual number of records retrieved: {len(hits)}")
            
            # Data processing
            records = []
            for hit in hits:
                source = hit["_source"]
                
                # Get key fields, using get method to safely handle potentially missing fields
                sentiment = source.get("sentiment", 0)
                sentiment_label = source.get("sentimentLabel", "")
                keywords_list = source.get("keywords", [])
                
                # Extract content from data object
                data_obj = source.get("data", {})
                content = data_obj.get("content", "")
                created_at = data_obj.get("createdAt", source.get("fetchedAt", ""))
                tags = data_obj.get("tags", [])
                
                # Process location information
                matched_locations = []
                
                # 1. First look for matching SA3 regions in keywords
                for loc in all_locations:
                    if loc in keywords_list:
                        matched_locations.append(loc)
                
                # 2. If no locations found in keywords, try finding them in the content
                if not matched_locations:
                    content_lower = content.lower()
                    for loc in all_locations:
                        if loc.lower() in content_lower:
                            matched_locations.append(loc)
                
                # 3. If still no location, use default value
                if not matched_locations:
                    matched_locations = ["unknown_location"]
                
                # Extract matching election keywords
                matched_election_terms = []
                for term in election_keywords:
                    # Check keywords array
                    if term in keywords_list:
                        matched_election_terms.append(term)
                    # Check content (for cases where keywords weren't extracted)
                    elif term.lower() in content.lower():
                        matched_election_terms.append(term)
                
                # Add matching tags
                matched_election_tags = [tag for tag in tags if tag in election_tags]
                
                # Get source platform information
                source_platform = source.get("platform", "unknown")
                
                # Process date
                try:
                    date = datetime.fromisoformat(created_at.replace('Z', '+00:00'))
                    date_str = date.strftime('%Y-%m-%d')
                except Exception as e:
                    date_str = "unknown"
                
                # Add matching records
                for location in matched_locations:
                    records.append({
                        "location": location,
                        "sentiment": sentiment,
                        "sentiment_label": sentiment_label,
                        "date": date_str,
                        "source": source_platform,
                        "tags": matched_election_tags,
                        "election_terms": matched_election_terms,
                        "content_preview": content[:150] + "..." if len(content) > 150 else content,
                        "score": hit.get("_score", 0)  # Add relevance score
                    })
            
            # Create DataFrame
            df = pd.DataFrame(records)
        
        # Basic data exploration
        print(f"\nNumber of processed records: {len(df)}")
        
        if len(df) > 0:
            # Get most common locations
            print("\nMost common locations (Top 10):")
            location_counts = df['location'].value_counts().head(10)
            for loc, count in location_counts.items():
                print(f"  {loc}: {count}")
            
            print("\nSentiment analysis overview:")
            print(df['sentiment'].describe())
            
            if 'sentiment_label' in df.columns and not df['sentiment_label'].isna().all():
                print("\nSentiment label distribution:")
                print(df['sentiment_label'].value_counts())
            
            if 'tags' in df.columns:
                # Flatten tag lists
                all_tags = []
                for tag_list in df['tags']:
                    if isinstance(tag_list, list) and tag_list:
                        all_tags.extend(tag_list)
                
                if all_tags:
                    tag_counts = pd.Series(all_tags).value_counts()
                    print("\nMost common election tags:")
                    print(tag_counts.head(10))
            
            if 'election_terms' in df.columns:
                # Flatten election keyword lists
                all_terms = []
                for term_list in df['election_terms']:
                    if isinstance(term_list, list) and term_list:
                        all_terms.extend(term_list)
                
                if all_terms:
                    term_counts = pd.Series(all_terms).value_counts()
                    print("\nMost common election keywords:")
                    print(term_counts.head(10))
            
            print("\nRecord date range:")
            if 'date' in df.columns and df['date'].nunique() > 1:
                df_with_dates = df[df['date'] != 'unknown']
                if len(df_with_dates) > 0:
                    print(f"Earliest: {df_with_dates['date'].min()}")
                    print(f"Latest: {df_with_dates['date'].max()}")
                else:
                    print("No valid date records")
            
            print("\nContent preview examples:")
            if len(df) > 0 and 'content_preview' in df.columns:
                for i, preview in enumerate(df['content_preview'].head(3)):
                    print(f"\nRecord {i+1}:")
                    print(preview)
            
            # Visualization section
            try:
                if len(df) >= 10:
                    # 1. Comparison of record counts by region
                    plt.figure(figsize=(12, 8))
                    location_counts = df['location'].value_counts().head(15)
                    location_counts.plot(kind='barh', color='steelblue')
                    plt.title('Election-Related Record Counts by Region', fontsize=14)
                    plt.xlabel('Record Count', fontsize=12)
                    plt.ylabel('Region', fontsize=12)
                    plt.grid(True, linestyle='--', alpha=0.7)
                    plt.tight_layout()
                    plt.savefig('election_location_counts.png')
                    print("\nCreated region distribution chart and saved as 'election_location_counts.png'")
                    
                    # 2. Sentiment analysis grouped by state/territory
                    # Create state/territory mapping
                    state_mapping = {
                        'NSW': ['Sydney', 'Newcastle', 'Wollongong', 'New South Wales', 'NSW'],
                        'VIC': ['Melbourne', 'Geelong', 'Victoria', 'VIC'],
                        'QLD': ['Brisbane', 'Gold Coast', 'Cairns', 'Townsville', 'Queensland', 'QLD'],
                        'WA': ['Perth', 'Western Australia', 'WA'],
                        'SA': ['Adelaide', 'South Australia', 'SA'],
                        'TAS': ['Hobart', 'Tasmania', 'TAS'],
                        'NT': ['Darwin', 'Northern Territory', 'NT'],
                        'ACT': ['Canberra', 'Australian Capital Territory', 'ACT']
                    }
                    
                    # Add state/territory column
                    df['state'] = 'Other'
                    for state, locations in state_mapping.items():
                        df.loc[df['location'].isin(locations), 'state'] = state
                    
                    # Assign states/territories to SA3 regions
                    # This needs more detailed mapping, this is a simple example
                    sa3_state_prefixes = {
                        'NSW': ['Sydney', 'Newcastle', 'Wollongong', 'Blue Mountains', 'Central Coast', 'Illawarra', 
                                'Hunter', 'New England', 'North Coast', 'South Coast', 'Albury', 'Wagga Wagga'],
                        'VIC': ['Melbourne', 'Geelong', 'Bendigo', 'Ballarat', 'Gippsland', 'Mornington'],
                        'QLD': ['Brisbane', 'Gold Coast', 'Sunshine Coast', 'Cairns', 'Townsville', 'Mackay', 
                                'Rockhampton', 'Toowoomba', 'Bundaberg', 'Hervey Bay', 'Darling Downs'],
                        'WA': ['Perth', 'Bunbury', 'Mandurah', 'Pilbara', 'Kimberley', 'Goldfields', 'Wheatbelt'],
                        'SA': ['Adelaide', 'Barossa', 'Yorke', 'Eyre', 'Murray'],
                        'TAS': ['Hobart', 'Launceston', 'Burnie', 'Devonport', 'East Coast'],
                        'NT': ['Darwin', 'Alice Springs', 'Katherine', 'Arnhem', 'Barkly'],
                        'ACT': ['Canberra', 'Belconnen', 'Gungahlin', 'Tuggeranong', 'Woden']
                    }
                    
                    for state, prefixes in sa3_state_prefixes.items():
                        for location in df['location'].unique():
                            if df.loc[df['location'] == location, 'state'].iloc[0] == 'Other':
                                for prefix in prefixes:
                                    if prefix in location:
                                        df.loc[df['location'] == location, 'state'] = state
                                        break
                    
                    # Sentiment analysis by state/territory
                    if len(df['state'].unique()) > 1:
                        plt.figure(figsize=(10, 6))
                        sns.boxplot(x='state', y='sentiment', data=df)
                        plt.title('Sentiment Distribution for Election Topics by State/Territory', fontsize=14)
                        plt.xlabel('State/Territory', fontsize=12)
                        plt.ylabel('Sentiment Score', fontsize=12)
                        plt.grid(True, linestyle='--', alpha=0.7)
                        plt.tight_layout()
                        plt.savefig('election_sentiment_by_state.png')
                        print("Created state/territory sentiment analysis chart and saved as 'election_sentiment_by_state.png'")
                    
                    # 3. Election keyword word cloud
                    if 'election_terms' in df.columns and sum(df['election_terms'].apply(len)) > 0:
                        try:
                            from wordcloud import WordCloud
                            all_terms = []
                            for term_list in df['election_terms']:
                                if isinstance(term_list, list) and term_list:
                                    all_terms.extend(term_list)
                            
                            if all_terms:
                                term_counts = pd.Series(all_terms).value_counts().to_dict()
                                
                                plt.figure(figsize=(12, 8))
                                wordcloud = WordCloud(width=800, height=500, background_color='white', 
                                                      max_words=100, contour_width=3, contour_color='steelblue')
                                wordcloud.generate_from_frequencies(term_counts)
                                plt.imshow(wordcloud, interpolation='bilinear')
                                plt.axis('off')
                                plt.tight_layout()
                                plt.savefig('election_terms_wordcloud.png')
                                print("Created election keyword word cloud and saved as 'election_terms_wordcloud.png'")
                        except ImportError:
                            print("Tip: Install wordcloud package to generate word clouds: pip install wordcloud")
                            
                            # Fall back to bar chart
                            term_counts = pd.Series(all_terms).value_counts().head(15)
                            plt.figure(figsize=(12, 8))
                            term_counts.plot(kind='barh', color='steelblue')
                            plt.title('Election Keyword Distribution', fontsize=14)
                            plt.xlabel('Frequency', fontsize=12)
                            plt.ylabel('Keyword', fontsize=12)
                            plt.grid(True, linestyle='--', alpha=0.7)
                            plt.tight_layout()
                            plt.savefig('election_terms_counts.png')
                            print("Created election keyword distribution chart and saved as 'election_terms_counts.png'")
                    
                    # 4. Time trend analysis
                    if 'date' in df.columns and df['date'].nunique() > 3:
                        time_df = df[df['date'] != 'unknown'].copy()
                        if len(time_df) > 10:
                            time_df['date'] = pd.to_datetime(time_df['date'])
                            daily_counts = time_df.groupby(time_df['date']).size().reset_index(name='count')
                            daily_sentiment = time_df.groupby(time_df['date'])['sentiment'].mean().reset_index()
                            
                            # Post volume time trend
                            plt.figure(figsize=(12, 6))
                            plt.plot(daily_counts['date'], daily_counts['count'], 
                                    marker='o', linestyle='-', color='royalblue')
                            plt.title('Election-Related Post Count Over Time', fontsize=14)
                            plt.xlabel('Date', fontsize=12)
                            plt.ylabel('Post Count', fontsize=12)
                            plt.grid(True, linestyle='--', alpha=0.7)
                            plt.xticks(rotation=45)
                            plt.tight_layout()
                            plt.savefig('election_posts_trend.png')
                            print("Created post count trend chart and saved as 'election_posts_trend.png'")
                            
                            # Sentiment time trend
                            plt.figure(figsize=(12, 6))
                            plt.plot(daily_sentiment['date'], daily_sentiment['sentiment'], 
                                    marker='o', linestyle='-', color='teal')
                            plt.axhline(y=0, color='red', linestyle='--', alpha=0.7)
                            plt.title('Election Topic Sentiment Trend Over Time', fontsize=14)
                            plt.xlabel('Date', fontsize=12)
                            plt.ylabel('Average Sentiment Score', fontsize=12)
                            plt.grid(True, linestyle='--', alpha=0.7)
                            plt.xticks(rotation=45)
                            plt.tight_layout()
                            plt.savefig('election_sentiment_trend.png')
                            print("Created sentiment trend chart and saved as 'election_sentiment_trend.png'")
                    
                    # 5. Popular regions sentiment heatmap
                    if len(df) >= 20:
                        top_locations = df['location'].value_counts().head(10).index.tolist()
                        sentiment_pivot = pd.DataFrame()
                        
                        # Get sentiment distribution for these regions
                        for loc in top_locations:
                            loc_sentiments = df[df['location'] == loc]['sentiment']
                            if len(loc_sentiments) >= 5:  # Ensure enough data
                                sentiment_pivot[loc] = pd.cut(
                                    loc_sentiments, 
                                    bins=[-1, -0.5, -0.25, 0, 0.25, 0.5, 1],
                                    labels=['Very Negative', 'Negative', 'Slightly Negative', 
                                            'Neutral', 'Slightly Positive', 'Positive']
                                ).value_counts(normalize=True)
                        
                        if not sentiment_pivot.empty and sentiment_pivot.shape[1] >= 3:
                            plt.figure(figsize=(14, 8))
                            sns.heatmap(sentiment_pivot.transpose(), annot=True, cmap='YlGnBu', fmt='.2f',
                                      cbar_kws={'label': 'Proportion'})
                            plt.title('Sentiment Distribution Heatmap for Popular Regions', fontsize=14)
                            plt.ylabel('Region', fontsize=12)
                            plt.xlabel('Sentiment Category', fontsize=12)
                            plt.tight_layout()
                            plt.savefig('election_sentiment_heatmap.png')
                            print("Created sentiment distribution heatmap and saved as 'election_sentiment_heatmap.png'")
            except Exception as e:
                print(f"Error creating visualizations: {e}")
        else:
            print("No records found, cannot perform data exploration.")
            
        # Save processed data
        if len(df) > 0:
            try:
                export_filename = 'australian_election_data.csv'
                df.to_csv(export_filename, index=False)
                print(f"\nData exported to {export_filename}")
                
                # Export results summary
                summary = {
                    "Query Time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                    "Total Hits": total_hits,
                    "Processed Records": len(df),
                    "Location Distribution": df['location'].value_counts().to_dict(),
                    "Sentiment Statistics": {
                        "Mean": float(df['sentiment'].mean()),
                        "Median": float(df['sentiment'].median()),
                        "Standard Deviation": float(df['sentiment'].std()),
                        "Minimum": float(df['sentiment'].min()),
                        "Maximum": float(df['sentiment'].max())
                    }
                }
                
                if 'election_terms' in df.columns:
                    all_terms = []
                    for term_list in df['election_terms']:
                        if isinstance(term_list, list) and term_list:
                            all_terms.extend(term_list)
                    
                    if all_terms:
                        summary["Popular Election Keywords"] = pd.Series(all_terms).value_counts().head(20).to_dict()
                
                with open('election_analysis_summary.json', 'w') as f:
                    json.dump(summary, f, indent=2)
                print("Analysis summary exported to 'election_analysis_summary.json'")
            except Exception as e:
                print(f"Error exporting data: {e}")

except Exception as e:
    print(f"Query execution error: {str(e)}")
    
    # Diagnostic information
    if "ConnectionError" in str(e):
        print("Connection error: Cannot connect to Elasticsearch. Please check if the service is running and the network connection.")
    elif "AuthenticationException" in str(e):
        print("Authentication error: Incorrect username or password.")
    elif "index_not_found_exception" in str(e):
        print("Index error: Index 'socialplatform' does not exist.")
    elif "SearchPhaseExecutionException" in str(e):
        print("Query error: Query syntax or parameters may be incorrect.")
    else:
        print("Other error. Please check Elasticsearch logs for more information.")

print("\nScript execution completed")