# This is a sample Jupyter Notebook

Below is an example of a code cell. 
Put your cursor into the cell and press Shift+Enter to execute it and select the next one, or click !here goes the icon of the corresponding button in the gutter! button.
To debug a cell, press Alt+Shift+Enter, or click !here goes the icon of the corresponding button in the gutter! button.

Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.

To learn more about Jupyter Notebooks in PyCharm, see [help](https://www.jetbrains.com/help/pycharm/jupyter-notebook-support.html).
For an overview of PyCharm, go to Help -> Learn IDE features or refer to [our documentation](https://www.jetbrains.com/help/pycharm/getting-started.html).

In [3]:

!pip install pandas faker folium geopy

# Import libraries and define helper functions
import pandas as pd
from faker import Faker
import random
import folium
from geopy.distance import geodesic

# Initialize Faker
fake = Faker()

# Helper function to generate European coordinates
def generate_european_latitude():
    return random.uniform(36.0, 71.0)  # Approximate latitudes for Europe

def generate_european_longitude():
    return random.uniform(-10.0, 40.0)  # Approximate longitudes for Europe

# Generate first DataFrame with tourist information
tourists = {
    'name': [fake.name() for _ in range(100)],
    'address': [fake.address() for _ in range(100)],
    'latitude': [generate_european_latitude() for _ in range(100)],
    'longitude': [generate_european_longitude() for _ in range(100)],
    'age': [random.randint(18, 70) for _ in range(100)]
}

df_tourists = pd.DataFrame(tourists)

# Generate second DataFrame with visit records
visit_records = {
    'name': [random.choice(tourists['name']) for _ in range(1300)],
    'visit_date': [fake.date_this_decade() for _ in range(1300)],
    'latitude': [generate_european_latitude() for _ in range(1300)],
    'longitude': [generate_european_longitude() for _ in range(1300)]
}

df_visits = pd.DataFrame(visit_records)

# Display first few rows of each DataFrame
df_tourists.head(), df_visits.head()

# Merge the DataFrames on the 'name' column
df_merged = pd.merge(df_visits, df_tourists, on='name')
df_merged.head()

# Calculate the average age of the tourists
average_age = df_merged['age'].mean()
print(f"Average age of tourists: {average_age:.2f}")

# Helper function to check if two points are within 50 km radius
def is_within_radius(lat1, lon1, lat2, lon2, radius_km=50):
    return geodesic((lat1, lon1), (lat2, lon2)).km <= radius_km

# Group locations within a 50 km radius
location_counts = []
visited_locations = []

for idx, row in df_merged.iterrows():
    lat, lon = row['latitude_x'], row['longitude_x']
    found = False
    for loc in visited_locations:
        if is_within_radius(lat, lon, loc['latitude'], loc['longitude']):
            loc['count'] += 1
            found = True
            break
    if not found:
        visited_locations.append({'latitude': lat, 'longitude': lon, 'count': 1})

most_common_locations = sorted(visited_locations, key=lambda x: x['count'], reverse=True)[:5]
print("Top 5 most visited locations (grouped within 50 km radius):")
for loc in most_common_locations:
    print(f"Latitude: {loc['latitude']}, Longitude: {loc['longitude']}, Visits: {loc['count']}")

# Create a map centered around the average coordinates
m = folium.Map(location=[df_merged['latitude_x'].mean(), df_merged['longitude_x'].mean()], zoom_start=4)

# Add markers for the top 5 most visited locations
for loc in most_common_locations:
    folium.Marker(
        location=[loc['latitude'], loc['longitude']],
        popup=f"Visits: {loc['count']}"
    ).add_to(m)

# Save map to an HTML file
m.save('most_visited_locations.html')
m


Average age of tourists: 46.52
Top 5 most visited locations (grouped within 50 km radius):
Latitude: 69.86936003378803, Longitude: 25.01623585144322, Visits: 6
Latitude: 46.54303039036648, Longitude: -8.983145686618862, Visits: 6
Latitude: 57.8359589104419, Longitude: 4.29807707247579, Visits: 5
Latitude: 68.93546928140901, Longitude: 24.252685414401952, Visits: 5
Latitude: 50.60599036755616, Longitude: 33.949058082236306, Visits: 5
