In [34]:
import pandas as pd
import numpy as np
from geopy.distance import geodesic

# Westminster Abbey Coordinates
westminster_coords = (51.4995, -0.1276)

# Define the radius (5 km)
radius_km = 5

# Function to calculate bounding box
def get_bounding_box(center, radius_km):
    # Latitude and Longitude differences
    lat_diff = radius_km / 111.0  # 1 degree of latitude ≈ 111 km
    lon_diff = radius_km / (111.0 * np.cos(np.radians(center[0])))  # Longitude is adjusted for latitude

    # Calculate the bounding box
    min_lat = center[0] - lat_diff
    max_lat = center[0] + lat_diff
    min_lon = center[1] - lon_diff
    max_lon = center[1] + lon_diff

    return min_lat, max_lat, min_lon, max_lon

# Get the bounding box for Westminster Abbey
min_lat, max_lat, min_lon, max_lon = get_bounding_box(westminster_coords, radius_km)

# Output the bounding box coordinates
print(f"Bounding box coordinates for 10 km radius around Westminster Abbey:")
print(f"Min Latitude: {min_lat}")
print(f"Max Latitude: {max_lat}")
print(f"Min Longitude: {min_lon}")
print(f"Max Longitude: {max_lon}")


Bounding box coordinates for 10 km radius around Westminster Abbey:
Min Latitude: 51.454454954954954
Max Latitude: 51.54454504504504
Min Longitude: -0.19995902293139392
Max Longitude: -0.05524097706860606


In [35]:
# Number of towers to generate
n_towers = 200

# Generate random tower coordinates within the bounding box
towers = pd.DataFrame({
    'tower_id': range(1, n_towers + 1),
    'latitude': np.random.uniform(min_lat, max_lat, n_towers),
    'longitude': np.random.uniform(min_lon, max_lon, n_towers)
})

# Display the generated tower coordinates
print(len(towers))
print(towers.head(10))

200
   tower_id   latitude  longitude
0         1  51.459688  -0.113895
1         2  51.511132  -0.198482
2         3  51.472416  -0.055562
3         4  51.484569  -0.174958
4         5  51.501397  -0.179432
5         6  51.487729  -0.100003
6         7  51.469522  -0.185517
7         8  51.454561  -0.115278
8         9  51.509538  -0.177085
9        10  51.540247  -0.150276


In [36]:
import plotly.express as px

# Plot tower locations on an interactive map
fig = px.scatter_mapbox(
    towers,
    lat='latitude',
    lon='longitude',
    #color='tower_id',  # Color by tower ID (you can change this to another column if needed)
    size_max=50,
    zoom=10,  # Set the zoom level
    mapbox_style="open-street-map",  # Set the map style (you can also use "open-street-map", carto-positron, etc.)
    title="Tower Locations within 10 km of Westminster Abbey"
)

# Show the map
fig.show()

In [37]:
# Display the updated towers DataFrame
n_users = 5000

interests = ['Sports', 'Shopping', 'Food', 'Travel']
brands = ['Nike', 'Apple', 'Samsung', 'Adidas', 'Sony']
eco_profiles = ['Low', 'Middle', 'High']
users = pd.DataFrame({
    'user_id': range(1, n_users + 1),
    'age': np.random.randint(18, 65, n_users),
    'gender': np.random.choice(['Male', 'Female', 'Other'], n_users),
    'interest': np.random.choice(interests, n_users),
    'socioeconomic_profile': np.random.choice(eco_profiles, n_users),
    'brand_affinity': np.random.choice(brands, n_users)
})

# Adding 'brand_affinity' with random selection of 0 to 5 brands
users['brand_affinity'] = [
    np.random.choice(brands, np.random.randint(1, 6), replace=False).tolist()  # 0 to 5 brands per user
    for _ in range(n_users)
]

print(len(users))
print(users.head(10))

5000
   tower_id   latitude  longitude
0         1  51.459688  -0.113895
1         2  51.511132  -0.198482
2         3  51.472416  -0.055562
3         4  51.484569  -0.174958
4         5  51.501397  -0.179432
5         6  51.487729  -0.100003
6         7  51.469522  -0.185517
7         8  51.454561  -0.115278
8         9  51.509538  -0.177085
9        10  51.540247  -0.150276


In [38]:
n_records = 10000

logs = pd.DataFrame({
    'user_id': np.random.choice(users['user_id'], n_records),
    'tower_id': np.random.choice(towers['tower_id'], n_records),
    'timestamp': pd.date_range(start='2024-01-01', periods=n_records, freq='min'),
})

print(len(logs))
print(logs.head(10))

10000
   user_id  tower_id           timestamp
0      981        84 2024-01-01 00:00:00
1     1682        56 2024-01-01 00:01:00
2     3925        14 2024-01-01 00:02:00
3     3066        20 2024-01-01 00:03:00
4     4352       122 2024-01-01 00:04:00
5     4109        46 2024-01-01 00:05:00
6     3802        78 2024-01-01 00:06:00
7     2873       111 2024-01-01 00:07:00
8     4525       197 2024-01-01 00:08:00
9     1187       199 2024-01-01 00:09:00


In [39]:
# Step 1: Join logs with users on 'user_id'
logs_users = logs.merge(users, on='user_id', how='inner')

# Step 2: Join the result with towers on 'tower_id'
final_data = logs_users.merge(towers, on='tower_id', how='inner')

# Display the final joined dataset
print(len(final_data))
print(final_data.head(10))

10000
   user_id  tower_id           timestamp  age  gender  interest  \
0      981        84 2024-01-01 00:00:00   19    Male    Sports   
1     1682        56 2024-01-01 00:01:00   63   Other    Travel   
2     3925        14 2024-01-01 00:02:00   26    Male      Food   
3     3066        20 2024-01-01 00:03:00   56   Other      Food   
4     4352       122 2024-01-01 00:04:00   53    Male  Shopping   
5     4109        46 2024-01-01 00:05:00   28    Male    Travel   
6     3802        78 2024-01-01 00:06:00   45  Female  Shopping   
7     2873       111 2024-01-01 00:07:00   54  Female    Sports   
8     4525       197 2024-01-01 00:08:00   18   Other  Shopping   
9     1187       199 2024-01-01 00:09:00   48   Other    Sports   

  socioeconomic_profile                        brand_affinity   latitude  \
0                   Low                                [Sony]  51.465869   
1                Middle  [Samsung, Sony, Nike, Apple, Adidas]  51.522767   
2                Middle  [So

In [40]:
# Step 1: Define Actual POIs for a 10 km Radius Around Westminster Abbey
westminster_pois = [
    {"name": "Westminster Abbey", "category": "Tourist Attraction", "latitude": 51.4993, "longitude": -0.1273},
    {"name": "Big Ben", "category": "Tourist Attraction", "latitude": 51.5007, "longitude": -0.1246},
    {"name": "Buckingham Palace", "category": "Tourist Attraction", "latitude": 51.5014, "longitude": -0.1419},
    {"name": "Trafalgar Square", "category": "Tourist Attraction", "latitude": 51.5080, "longitude": -0.1281},
    {"name": "St. James's Park", "category": "Park", "latitude": 51.5023, "longitude": -0.1346},
    {"name": "Hyde Park", "category": "Park", "latitude": 51.5073, "longitude": -0.1657},
    {"name": "Victoria Station", "category": "Transport", "latitude": 51.4964, "longitude": -0.1431},
    {"name": "Piccadilly Circus", "category": "Tourist Attraction", "latitude": 51.5098, "longitude": -0.1342},
    {"name": "London Eye", "category": "Tourist Attraction", "latitude": 51.5033, "longitude": -0.1195},
    {"name": "The National Gallery", "category": "Tourist Attraction", "latitude": 51.5090, "longitude": -0.1283},
    {"name": "Houses of Parliament", "category": "Tourist Attraction", "latitude": 51.4995, "longitude": -0.1248},
    {"name": "The Mall", "category": "Tourist Attraction", "latitude": 51.5049, "longitude": -0.1357},
    {"name": "Green Park", "category": "Park", "latitude": 51.5069, "longitude": -0.1427},
    {"name": "Regent's Park", "category": "Park", "latitude": 51.5313, "longitude": -0.1569},
    {"name": "Oxford Circus", "category": "Shopping", "latitude": 51.5154, "longitude": -0.1419},
    {"name": "Covent Garden", "category": "Shopping", "latitude": 51.5115, "longitude": -0.1234},
    {"name": "Leicester Square", "category": "Tourist Attraction", "latitude": 51.5111, "longitude": -0.1280},
    {"name": "Soho", "category": "Shopping", "latitude": 51.5136, "longitude": -0.1312},
    {"name": "West End Theatre District", "category": "Tourist Attraction", "latitude": 51.5120, "longitude": -0.1300},
    {"name": "Charing Cross Station", "category": "Transport", "latitude": 51.5080, "longitude": -0.1247},
    {"name": "Paddington Station", "category": "Transport", "latitude": 51.5154, "longitude": -0.1757},
    {"name": "Southbank Centre", "category": "Tourist Attraction", "latitude": 51.5053, "longitude": -0.1168},
    {"name": "Waterloo Station", "category": "Transport", "latitude": 51.5036, "longitude": -0.1137},
    {"name": "King's Cross Station", "category": "Transport", "latitude": 51.5312, "longitude": -0.1241},
    {"name": "British Museum", "category": "Tourist Attraction", "latitude": 51.5194, "longitude": -0.1270},
    {"name": "Shakespeare's Globe", "category": "Tourist Attraction", "latitude": 51.5081, "longitude": -0.0972},
    {"name": "Millennium Bridge", "category": "Tourist Attraction", "latitude": 51.5081, "longitude": -0.0983},
    {"name": "Tower Bridge", "category": "Tourist Attraction", "latitude": 51.5055, "longitude": -0.0754},
    {"name": "Tower of London", "category": "Tourist Attraction", "latitude": 51.5081, "longitude": -0.0761},
    {"name": "Shard London", "category": "Tourist Attraction", "latitude": 51.5045, "longitude": -0.0865},
    {"name": "Camden Market", "category": "Shopping", "latitude": 51.5414, "longitude": -0.1455},
    {"name": "Harrods", "category": "Shopping", "latitude": 51.4995, "longitude": -0.1633},
    {"name": "Natural History Museum", "category": "Tourist Attraction", "latitude": 51.4967, "longitude": -0.1764},
    {"name": "Science Museum", "category": "Tourist Attraction", "latitude": 51.4978, "longitude": -0.1745},
    {"name": "Victoria and Albert Museum", "category": "Tourist Attraction", "latitude": 51.4966, "longitude": -0.1722},
    {"name": "Imperial War Museum", "category": "Tourist Attraction", "latitude": 51.4957, "longitude": -0.1082},
    {"name": "Kensington Gardens", "category": "Park", "latitude": 51.5058, "longitude": -0.1878},
    {"name": "Hampstead Heath", "category": "Park", "latitude": 51.5605, "longitude": -0.1657},
    {"name": "Serpentine Galleries", "category": "Tourist Attraction", "latitude": 51.5048, "longitude": -0.1744},
    {"name": "Carnaby Street", "category": "Shopping", "latitude": 51.5134, "longitude": -0.1407},
    {"name": "Portobello Road Market", "category": "Shopping", "latitude": 51.5178, "longitude": -0.2059},
    {"name": "Holborn Station", "category": "Transport", "latitude": 51.5175, "longitude": -0.1192},
    {"name": "Russell Square", "category": "Park", "latitude": 51.5238, "longitude": -0.1249},
    {"name": "Fitzrovia", "category": "Shopping", "latitude": 51.5185, "longitude": -0.1358},
    {"name": "London Transport Museum", "category": "Tourist Attraction", "latitude": 51.5122, "longitude": -0.1204},
    {"name": "The Gherkin", "category": "Tourist Attraction", "latitude": 51.5145, "longitude": -0.0804},
    {"name": "The Walkie Talkie", "category": "Tourist Attraction", "latitude": 51.5106, "longitude": -0.0837}
]

# Convert POIs to a DataFrame
poi_data = pd.DataFrame(westminster_pois)

In [41]:
import math
import pandas as pd
from geopy.distance import geodesic

# Haversine formula to calculate distance between two latitude/longitude points
def haversine(lat1, lon1, lat2, lon2):
    R = 6371  # Radius of Earth in kilometers
    phi1, phi2 = math.radians(lat1), math.radians(lat2)
    delta_phi = math.radians(lat2 - lat1)
    delta_lambda = math.radians(lon2 - lon1)

    a = math.sin(delta_phi / 2)**2 + math.cos(phi1) * math.cos(phi2) * math.sin(delta_lambda / 2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    return R * c * 1000  # Convert to meters

# Function to calculate visit counts for POIs based on tower proximity
def calculate_visit_count(tower_df, poi_df, user_df, radius=500):
    visit_counts = []

    for _, poi in poi_df.iterrows():
        poi_lat, poi_lon = poi['latitude'], poi['longitude']
        # Filter towers within 500 meters of the POI
        nearby_towers = tower_df[
            tower_df.apply(lambda x: haversine(poi_lat, poi_lon, x['latitude'], x['longitude']) <= radius, axis=1)
        ]
        
        # Get users connected to nearby towers
        visit_count = user_df[user_df['tower_id'].isin(nearby_towers['tower_id'])].shape[0]
        visit_counts.append({'name': poi['name'], 'latitude': poi['latitude'], 'longitude': poi['longitude'], 'category': poi['category'], 'visit_count': visit_count})
    
    return pd.DataFrame(visit_counts)


# Calculate visit counts
visit_counts_df = calculate_visit_count(towers, poi_data, logs, radius=500)
#print(visit_counts_df[visit_counts_df['visit_count'] > 0])


In [42]:
# Step 2: Visualize POIs on a Map Using Plotly
fig = px.scatter_mapbox(
    visit_counts_df,
    lat="latitude",
    lon="longitude",
    color="category",
    size="visit_count",
    hover_name="name",
    hover_data={"latitude": True, "longitude": True, "visit_count": True},
    mapbox_style="open-street-map",
    zoom=12,
    title="Popular POIs Around Westminster Abbey"
)

# Add markers for POIs
fig.add_scattermapbox(
    lat=poi_data['latitude'],
    lon=poi_data['longitude'],
    mode='markers',
    marker=dict(size=6, color='red'),
    name='POI Markers',
    text=poi_data['name']
)

fig.show()
