In [1]:
#BOOK 6 : Buddha

import csv
import pandas as pd
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline

# Load pre-trained tokenizer and model for NER
tokenizer = AutoTokenizer.from_pretrained("Jean-Baptiste/roberta-large-ner-english")
model = AutoModelForTokenClassification.from_pretrained("Jean-Baptiste/roberta-large-ner-english")

# Define your data
mydata = "/Users/shikha/Desktop/GJ/txt/walk_to_the_part_images.txt"

# Initialize NER pipeline with modified aggregation_strategy
nlp = pipeline('ner', model=model, tokenizer=tokenizer, aggregation_strategy="average")

# Chunk size for iterating over the text
chunk_size = 1000

# Initialize an empty list to store unique locations
unique_locations = []

# Write the locations to a CSV file
with open("locations.csv", "w", newline="", encoding="utf-8") as csvfile:
    fieldnames = ['word', 'start', 'end']  # Define the field names for the CSV file
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()

    # Read text from file and process in chunks
    with open(mydata, "r", encoding="utf-8") as file:
        text = file.read()

        # Preprocess the text
        text = text.replace('\n', ' ').replace('.', ' ').replace(',', ' ')

        # Process text in chunks
        for i in range(0, len(text), chunk_size):
            chunk = text[i:i+chunk_size]
            # Perform NER on the chunk
            output = nlp(chunk)
            # Write the locations to the CSV file
            for entity in output:
                if entity['entity_group'] == 'LOC':  # Check if the entity is a location
                    location = entity['word'].strip()
                    if location.isalpha() and location.istitle() and len(location) > 2:  # Filter by capitalization, length, and alphabetic characters
                        writer.writerow({'word': location, 'start': entity['start'], 'end': entity['end']})
                        unique_locations.append(location)  # Add location to the list

# Convert the list of unique locations to a DataFrame
df = pd.DataFrame({'A': unique_locations})

# Get unique values of 'A' column (locations)
unique_values = df['A'].unique()

print(unique_values)



['Pillar' 'Kosambi' 'Yamuna' 'Allahbad' 'Vamsa' 'Savatthi' 'Ghositarama'
 'Parileyya' 'Asokan' 'Blakalonakra' 'Prileyya' 'Sankasya' 'Basantpur'
 'Farrukhabad' 'India' 'Agra' 'Delhi' 'Trayastrimsas' 'Heap' 'Rajagaha'
 'Giribhaja' 'Hill' 'Magadha' 'Patna' 'Gaya' 'Vebhara' 'Vepulla' 'Pandava'
 'Gijjhakuta' 'Isigili' 'Vaibhara' 'Vipula' 'Ratna' 'Sona' 'Udaya'
 'Svatth' 'Bimbisara' 'Veluvana' 'Jivakambavana' 'Rajhagaha' 'Rahagaha'
 'Pataliputra' 'Rhagaha' 'Nalanda' 'Baragaon' 'Bihar' 'Amballatthika'
 'Burma' 'Nepal' 'Tibet' 'Ambalatthik' 'Suppiya' 'Budtelbha' 'Antichak'
 'Bhagalpur' 'Bengal' 'Temple' 'Monastery' 'Java' 'Sumatra' 'Dhammika'
 'Sanchi' 'Bhopal' 'Bombay' 'Ujayni' 'Awanti' 'Ujani' 'Vidisa' 'Calcutta'
 'Lumbini' 'Sarnath' 'Mahaparinibbana' 'Kusinara' 'Veruwanaram' 'Magadh'
 'Gandhakuti' 'Jetavana' 'Ajanta' 'Aurangabad' 'Maharashtra' 'Asia'
 'Ellora' 'Washington' 'Bodhgaya' 'Vajrasana' 'Kailash' 'Kalash'
 'Kapilavastu' 'Uruvela' 'Vesali' 'Besrah' 'China' 'Japan' 'Korea'
 'Vietnam'

In [17]:
import csv

# Corrected list of places and locations
places_and_locations = [
    'Kosambi', 'Yamuna', 'Prayagraj', 'Kushinagar',
    'Agra', 'Delhi', 'Rajgir', 'Patna', 'Gaya', 'Pataliputra',
    'Nalanda', 'Baragaon', 'Bihar', 'Burma', 'Nepal', 'Tibet',
    'Bhagalpur', 'Bengal', 'Java', 'Sumatra', 'Sanchi', 'Bhopal', 'Bombay',
    'Ujani', 'Calcutta', 'Lumbini', 'Sarnath', 'Jetavana', 'Ajanta', 
    'Aurangabad', 'Maharashtra', 'Ellora', 'Bodh Gaya', 'Kailash', 
    'Kapilavastu', 'China', 'Japan', 'Korea', 'Vietnam', 'Thailand', 
    'Mahavihara', 'Anuradhapura', 'Afghanistan', 'Uzbekistan', 'Mathura', 
    'Gandhara', 'Alexandria', 'Madhya Pradesh', 'Borobudur', 'Polonnaruwa', 
    'Angkor', 'Cambodia', 'Bundelkhand', 'Myanmar', 'Laos', 'Lanna', 
    'Ayutthaya', 'Durgabari', 'Arrah', 'Manpur', 'Mangarhi', 'Bangkok', 
    'Amarapura', 'Mandalay', 'Chaiya', 'Yangon', 'Dharamasala', 
    'Phra Khanong', 'Dambulla', 'Jabalpur', 'Barua', 'Kandy', 'Bidari', 
    'Sakon Nakhon', 'Isan', 'Shravasti', 'Rupandehi', 'Piprahwa', 'Terai', 
    'Sivalik Hills', 'Siddharthanagar', 'Sonauli', 'Kathmandu', 'Gorakhpur', 
    'Devadaha', 'Pukarani', 'Sakya', 'Karnali', 'Ural', 'Varanasi', 
    'Sambodhi Chaithya', 'Bahrhut', 'Bodh Gaya', 'Chunar', 'Gelug', 
    'Sarnath', 'Varana', 'Mecca', 'Bhandara', 'Balrampur', 'Rapti', 
    'Muzaffarpur', 'Nashik'
]

# Writing to a CSV file
with open('places_and_locations6.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    for place in places_and_locations:
        writer.writerow([place])



In [18]:
import csv
import requests

# Replace this with the path to your input CSV file
input_csv_path = 'places_and_locations6.csv'
# Output CSV file path
output_csv_path = 'wikidata_locations_with_geo6.csv'

def get_geolocation(place_name):
    # Wikidata endpoint for SPARQL queries
    url = 'https://query.wikidata.org/sparql'
    # SPARQL query to get geolocation for a place name
    query = f"""
    SELECT ?place ?placeLabel ?location WHERE {{
      ?place ?label "{place_name}"@en.
      ?place wdt:P625 ?location.
      SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en". }}
    }}
    LIMIT 1
    """
    headers = {'User-Agent': 'MyApp/1.0 (myemail@example.com)', 'Accept': 'application/json'}
    try:
        response = requests.get(url, headers=headers, params={'query': query, 'format': 'json'})
        response.raise_for_status()  # Raises HTTPError, if one occurred
        data = response.json()
        results = data['results']['bindings']
        if results:
            location = results[0]['location']['value']
            # Extracting latitude and longitude from the point string
            lon, lat = location.replace('Point(', '').replace(')', '').split(' ')
            return lat, lon
    except Exception as e:
        print(f"Error fetching geolocation for {place_name}: {e}")
    return None, None

# Read place names from CSV
places = []
with open(input_csv_path, mode='r', encoding='utf-8') as infile:
    reader = csv.reader(infile)
    next(reader)  # Skip header
    for row in reader:
        places.append(row[0])  # Assuming place name is in the first column

# Check if places are read correctly
print(f"Places read from CSV: {places}")

# Fetch geolocations and write to a new CSV
with open(output_csv_path, mode='w', encoding='utf-8', newline='') as outfile:
    writer = csv.writer(outfile)
    writer.writerow(['Place Name', 'Latitude', 'Longitude'])
    for place in places:
        lat, lon = get_geolocation(place)
        if lat and lon:
            writer.writerow([place, lat, lon])
            print(f"Geolocation for {place}: {lat}, {lon}")
        else:
            print(f"No geolocation found for {place}")

print("\nDone writing geolocations to CSV.")


Places read from CSV: ['Yamuna', 'Prayagraj', 'Kushinagar', 'Agra', 'Delhi', 'Rajgir', 'Patna', 'Gaya', 'Pataliputra', 'Nalanda', 'Baragaon', 'Bihar', 'Burma', 'Nepal', 'Tibet', 'Bhagalpur', 'Bengal', 'Java', 'Sumatra', 'Sanchi', 'Bhopal', 'Bombay', 'Ujani', 'Calcutta', 'Lumbini', 'Sarnath', 'Jetavana', 'Ajanta', 'Aurangabad', 'Maharashtra', 'Ellora', 'Bodh Gaya', 'Kailash', 'Kapilavastu', 'China', 'Japan', 'Korea', 'Vietnam', 'Thailand', 'Mahavihara', 'Anuradhapura', 'Afghanistan', 'Uzbekistan', 'Mathura', 'Gandhara', 'Alexandria', 'Madhya Pradesh', 'Borobudur', 'Polonnaruwa', 'Angkor', 'Cambodia', 'Bundelkhand', 'Myanmar', 'Laos', 'Lanna', 'Ayutthaya', 'Durgabari', 'Arrah', 'Manpur', 'Mangarhi', 'Bangkok', 'Amarapura', 'Mandalay', 'Chaiya', 'Yangon', 'Dharamasala', 'Phra Khanong', 'Dambulla', 'Jabalpur', 'Barua', 'Kandy', 'Bidari', 'Sakon Nakhon', 'Isan', 'Shravasti', 'Rupandehi', 'Piprahwa', 'Terai', 'Sivalik Hills', 'Siddharthanagar', 'Sonauli', 'Kathmandu', 'Gorakhpur', 'Devadaha'

In [19]:
import folium
from folium.plugins import MarkerCluster, HeatMap
import branca.colormap as cm

# Define the locations
locations = {
    'Yamuna': (25.423611111, 81.8825),
    'Prayagraj': (25.45, 81.85),
    'Kushinagar': (26.740277777, 83.888888888),
    'Agra': (27.1767, 78.0081),
    'Delhi': (28.666666666, 77.216666666),
    'Rajgir': (25.03, 85.42),
    'Patna': (25.61, 85.141388888),
    'Gaya': (24.75, 85.016666666),
    'Pataliputra': (25.61, 85.141388888),
    'Nalanda': (25.2, 85.52),
    'Baragaon': (23.733055555, 76.306944444),
    'Bihar': (25.37, 85.13),
    'Burma': (22.0, 96.0),
    'Nepal': (28.0, 84.0),
    'Tibet': (29.65, 91.1),
    'Bhagalpur': (25.25, 87.016666666),
    'Bengal': (23.9888, 88.7311),
    'Java': (-7.491666666, 110.004444444),
    'Sumatra': (0.0, 101.997),
    'Sanchi': (23.480655555, 77.7363),
    'Bhopal': (23.25, 77.416666666),
    'Bombay': (19.075833333, 72.8775),
    'Ujani': (18.08, 75.37),
    'Calcutta': (22.5726723, 88.3638815),
    'Lumbini': (27.4814, 83.275829),
    'Sarnath': (25.381111111, 83.021388888),
    'Jetavana': (27.50722222, 82.04388889),
    'Ajanta': (20.5333, 75.75),
    'Aurangabad': (19.88, 75.32),
    'Maharashtra': (18.97, 72.82),
    'Bodh Gaya': (24.695, 84.9925),
    'Kailash': (31.066944444, 81.312777777),
    'Kapilavastu': (27.543888888, 83.052777777),
    'China': (35.86166, 104.195397),
    'Japan': (35.196055555, 139.627833333),
    'Korea': (37.5665, 126.9780),
    'Vietnam': (16.0, 108.0),
    'Thailand': (15.8700, 100.9925),
    'Anuradhapura': (8.335, 80.410833333),
    'Afghanistan': (33.0, 66.0),
    'Uzbekistan': (41.0, 64.0),
    'Mathura': (27.4924134, 77.673673),
    'Gandhara': (34.75, 72.38),
    'Alexandria': (31.2001, 29.9187),
    'Madhya Pradesh': (23.25, 77.417),
    'Borobudur': (-7.60793, 110.20384),
    'Polonnaruwa': (7.933333333, 81.0),
    'Angkor': (13.433333333, 103.833333333),
    'Cambodia': (12.5657, 104.9910),
    'Bundelkhand': (24.854422, 79.921427),
    'Myanmar': (21.9162, 95.955974),
    'Laos': (19.8563, 102.4955),
    'Lanna': (18.7964, 98.9866),
    'Ayutthaya': (14.359359083, 100.5761452),
    'Arrah': (25.563055555, 84.671111111),
    'Manpur': (23.4483, 85.0198),
    'Mangarhi': (27.841267, 77.907035),
    'Bangkok': (13.75, 100.516666666),
    'Amarapura': (21.85302, 96.09545),
    'Mandalay': (21.983055555, 96.084444444),
    'Chaiya': (9.386666666, 99.2),
    'Yangon': (16.8660694, 96.195132),
    'Phra Khanong': (13.702222222, 100.601666666),
    'Dambulla': (7.851389, 80.654167),
    'Jabalpur': (23.166666666, 79.933333333),
    'Barua': (24.0861, 91.5333),
    'Kandy': (7.296961111, 80.638452777),
    'Bidari': (16.5264, 75.4586),
    'Sakon Nakhon': (17.154166666, 104.136111111),
    'Isan': (15.116667, 105.783333),
    'Shravasti': (27.517073, 82.050619),
    'Rupandehi': (27.5, 83.45),
    'Piprahwa': (27.291667, 83.022222),
    'Terai': (26.95, 85.233333),
    'Sivalik Hills': (30.4500, 77.2900),
    'Siddharthanagar': (27.5, 83.45),
    'Sonauli': (27.473806, 83.46913),
    'Kathmandu': (27.7172, 85.3240),
    'Gorakhpur': (26.7606, 83.3732),
    'Devadaha': (27.65, 83.5333),
    'Sakya': (43.0891472, 41.9013546),
    'Karnali': (29.85, 81.95),
    'Ural': (52.0231, 76.9275),
    'Varanasi': (25.318888888, 83.012777777),
    'Mecca': (21.4225, 39.826111111),
    'Bhandara': (21.166666666, 79.65),
    'Balrampur': (27.4295126, 82.1854602),
    'Muzaffarpur': (26.12, 85.383333333),
    'Nashik': (20.0, 73.783333333)
}

# Create a map with marker clusters
cluster_map = folium.Map(location=[20, 78], zoom_start=2)
marker_cluster = MarkerCluster().add_to(cluster_map)

for city, coords in locations.items():
    folium.Marker(
        location=coords,
        popup=f"{city}",
        icon=folium.Icon(icon='cloud')
    ).add_to(marker_cluster)

# Generate heatmap data
heatmap_data = [[coord[0], coord[1], 1] for coord in locations.values()]

# Add heatmap layer
HeatMap(heatmap_data).add_to(cluster_map)

# Define a color map
color_scale = cm.LinearColormap(['blue', 'green', 'yellow', 'orange', 'red'],
                                vmin=0, vmax=100)

# Add color index legend to the map
color_scale.add_to(cluster_map)

# Save the map to an HTML file
cluster_map.save('travel_heatmap_book6.html')
