In [None]:
%pip install transformers torch

In [None]:
from ddgs import DDGS
from transformers import pipeline
import random

def search_and_summarize(query, max_results=5):
    
    # Random search text 
    random_texts = [
        "Preparing search engine...",
        "Revving up the data miners...",
        "Consulting the digital oracle...",
        "Summoning the web crawlers...",
        "Mining the internet for nuggets of wisdom...",
        "Engaging search protocols...",
        "Navigating the information superhighway...",
        "Decoding the web's secrets...",
        "Harnessing the power of search algorithms...",
        "Exploring the vast digital landscape..."
    ]
    print(random.choice(random_texts))
    summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")

    ddgs = DDGS()
    print(f'Searching for: {query}...')
    results = ddgs.text(query, max_results=max_results)

    combined_text = ""
    for r in results:
        body = r.get('body', '')
        combined_text += body.strip().rstrip('.') + '. '

    print(f'\nRaw combined text length: {len(combined_text)} characters.')

    summary = summarizer(combined_text, max_length=150, min_length=40, do_sample=False)[0]['summary_text']
    
    return summary  

In [64]:
import pandas as pd 

# 1. Setup Data
df = pd.read_csv('data/poi.csv')
df = df[['name', 'municipality']]
processed = df.to_dict(orient='records')

# 2. Define the Streamlined Function
def process_and_save_poi(index, poi_list, filename='POI_description.txt'):
    try:
        # Get the specific POI
        poi = poi_list[index]
        name = poi['name']
        municipality = poi['municipality']
        
        # Generate Query
        query = f"{name} in {municipality} Bulacan Philippines"
        print(f"--- Processing: {name} [{index}] ---")
        
        # Search and Summarize (Assuming your function exists)
        # Note: Ensure search_and_summarize is defined in your scope
        result_summary = search_and_summarize(query, max_results=10)
        
        # Clean Text
        clean_text = result_summary.replace(" .", ".").replace(" ,", ",")
        
        # Print to console for your manual check
        print(f"\nGenerated Description:\n{clean_text}\n")
        
        # Save to file (Append Mode 'a')
        with open(filename, 'a', encoding='utf-8') as f:
            f.write(f"{index}. {name}\n\n")
            f.write(f"{clean_text}\n\n") # Adds a gap between entries
            
        print(f"Successfully saved to {filename}")
        
    except IndexError:
        print("Error: Index out of range.")
    except Exception as e:
        print(f"An error occurred: {e}")

# ---------------------------------------------------------
# 3. Usage: Change the number below to process the next POI
# ---------------------------------------------------------

current_index = 0

process_and_save_poi(current_index, processed)



--- Processing: bulakan mangrove nursery  [0] ---
Preparing search engine...


Device set to use cpu


Searching for: bulakan mangrove nursery  in bulakan Bulacan Philippines...

Raw combined text length: 2381 characters.

Generated Description:
 Bulakan is a coastal municipality in the province of Bulacan, Central Luzon region (Region III), Philippines. It has a population of 81,232 (2020 Census figures) distributed over 14 barangays. Bulacan has a total of 585.14 ha of mangroves.

Successfully saved to POI_description.txt


In [None]:
for i in range(0, 10):
    process_and_save_poi(i, processed)