# This is a sample Jupyter Notebook

Below is an example of a code cell. 
Put your cursor into the cell and press Shift+Enter to execute it and select the next one, or click !here goes the icon of the corresponding button in the gutter! button.
To debug a cell, press Alt+Shift+Enter, or click !here goes the icon of the corresponding button in the gutter! button.

Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.

To learn more about Jupyter Notebooks in PyCharm, see [help](https://www.jetbrains.com/help/pycharm/jupyter-notebook-support.html).
For an overview of PyCharm, go to Help -> Learn IDE features or refer to [our documentation](https://www.jetbrains.com/help/pycharm/getting-started.html).

In [1]:

!pip install pandas faker folium geopy

# Import libraries and define helper functions
import pandas as pd
from faker import Faker
import random
import folium
from geopy.distance import geodesic



In [2]:

# Initialize Faker
fake = Faker()

In [3]:

# Helper function to generate European coordinates
def generate_european_latitude():
    return random.uniform(36.0, 71.0)  # Approximate latitudes for Europe

In [4]:

def generate_european_longitude():
    return random.uniform(-10.0, 40.0)  # Approximate longitudes for Europe

In [5]:

# Generate first DataFrame with tourist information
tourists = {
    'name': [fake.name() for _ in range(100)],
    'address': [fake.address() for _ in range(100)],
    'latitude': [generate_european_latitude() for _ in range(100)],
    'longitude': [generate_european_longitude() for _ in range(100)],
    'age': [random.randint(18, 70) for _ in range(100)]
}

df_tourists = pd.DataFrame(tourists)

In [6]:

# Generate second DataFrame with visit records
visit_records = {
    'name': [random.choice(tourists['name']) for _ in range(1300)],
    'visit_date': [fake.date_this_decade() for _ in range(1300)],
    'latitude': [generate_european_latitude() for _ in range(1300)],
    'longitude': [generate_european_longitude() for _ in range(1300)]
}

df_visits = pd.DataFrame(visit_records)

In [7]:

# Display first few rows of each DataFrame
df_tourists.head(), df_visits.head()

(              name                                            address  \
 0   Monica Houston              256 Pacheco Ridge\nPaulside, KY 32968   
 1  Carla Gutierrez                        USNS Peterson\nFPO AE 01514   
 2     Oscar Nelson           4312 Noah Isle\nNew Ashleyview, VA 98756   
 3  Brandon Johnson            9701 Robert Neck\nWilsonmouth, GA 20461   
 4   Kristine Gomez  302 Robert Cliff Suite 447\nMichaeltown, KY 66342   
 
     latitude  longitude  age  
 0  53.196548  18.981428   36  
 1  41.383834  10.212498   20  
 2  51.672838  -5.150170   33  
 3  58.659810  33.275866   64  
 4  61.400891   3.190652   66  ,
                name  visit_date   latitude  longitude
 0  Veronica Ramirez  2023-09-16  52.923703   2.607813
 1    Kathryn Wilson  2023-04-11  50.532361   9.447432
 2      Oscar Nelson  2023-07-01  64.289417  18.748657
 3     Tracy Sanchez  2024-01-13  44.814785  39.423465
 4       Wendy White  2021-03-28  46.177807   9.781858)

In [8]:

# Merge the DataFrames on the 'name' column
df_merged = pd.merge(df_visits, df_tourists, on='name')
df_merged.head()

Unnamed: 0,name,visit_date,latitude_x,longitude_x,address,latitude_y,longitude_y,age
0,Veronica Ramirez,2023-09-16,52.923703,2.607813,"0901 Chris Freeway Suite 059\nDennishaven, SC ...",54.875612,17.149825,47
1,Kathryn Wilson,2023-04-11,50.532361,9.447432,"PSC 1860, Box 1708\nAPO AA 68481",41.691345,21.618747,33
2,Oscar Nelson,2023-07-01,64.289417,18.748657,"4312 Noah Isle\nNew Ashleyview, VA 98756",51.672838,-5.15017,33
3,Tracy Sanchez,2024-01-13,44.814785,39.423465,"0550 Timothy Alley\nHayleyside, MN 01584",54.356961,-2.801758,70
4,Wendy White,2021-03-28,46.177807,9.781858,"5960 Sharon Vista Apt. 788\nLake Laurenburgh, ...",59.701611,2.815184,54


In [9]:

# Calculate the average age of the tourists
average_age = df_merged['age'].mean()
print(f"Average age of tourists: {average_age:.2f}")

Average age of tourists: 44.01


In [10]:

# Helper function to check if two points are within 50 km radius
def is_within_radius(lat1, lon1, lat2, lon2, radius_km=50):
    return geodesic((lat1, lon1), (lat2, lon2)).km <= radius_km

In [11]:

# Group locations within a 50 km radius
location_counts = []
visited_locations = []

for idx, row in df_merged.iterrows():
    lat, lon = row['latitude_x'], row['longitude_x']
    found = False
    for loc in visited_locations:
        if is_within_radius(lat, lon, loc['latitude'], loc['longitude']):
            loc['count'] += 1
            found = True
            break
    if not found:
        visited_locations.append({'latitude': lat, 'longitude': lon, 'count': 1})

most_common_locations = sorted(visited_locations, key=lambda x: x['count'], reverse=True)[:5]
print("Top 5 most visited locations (grouped within 50 km radius):")
for loc in most_common_locations:
    print(f"Latitude: {loc['latitude']}, Longitude: {loc['longitude']}, Visits: {loc['count']}")

Top 5 most visited locations (grouped within 50 km radius):
Latitude: 52.92370325797059, Longitude: 2.6078132280692365, Visits: 5
Latitude: 59.06349164434478, Longitude: 34.89659435908291, Visits: 5
Latitude: 69.25465365316785, Longitude: 8.82505609283583, Visits: 5
Latitude: 69.93416402816317, Longitude: -5.086174956774455, Visits: 5
Latitude: 66.89431946733748, Longitude: 36.0435045574698, Visits: 4


In [12]:

# Create a map centered around the average coordinates
m = folium.Map(location=[df_merged['latitude_x'].mean(), df_merged['longitude_x'].mean()], zoom_start=4)

In [13]:

# Add markers for the top 5 most visited locations
for loc in most_common_locations:
    folium.Marker(
        location=[loc['latitude'], loc['longitude']],
        popup=f"Visits: {loc['count']}"
    ).add_to(m)

In [14]:

# Save map to an HTML file
m.save('most_visited_locations.html')
m
