In [134]:
import pandas as pd
import numpy as np
from geopy.distance import geodesic

# Westminster Abbey Coordinates
westminster_coords = (51.4995, -0.1276)

# Define the radius (5 km)
radius_km = 5

# Function to calculate bounding box
def get_bounding_box(center, radius_km):
    # Latitude and Longitude differences
    lat_diff = radius_km / 111.0  # 1 degree of latitude ≈ 111 km
    lon_diff = radius_km / (111.0 * np.cos(np.radians(center[0])))  # Longitude is adjusted for latitude

    # Calculate the bounding box
    min_lat = center[0] - lat_diff
    max_lat = center[0] + lat_diff
    min_lon = center[1] - lon_diff
    max_lon = center[1] + lon_diff

    return min_lat, max_lat, min_lon, max_lon

# Get the bounding box for Westminster Abbey
min_lat, max_lat, min_lon, max_lon = get_bounding_box(westminster_coords, radius_km)

# Output the bounding box coordinates
print(f"Bounding box coordinates for 10 km radius around Westminster Abbey:")
print(f"Min Latitude: {min_lat}")
print(f"Max Latitude: {max_lat}")
print(f"Min Longitude: {min_lon}")
print(f"Max Longitude: {max_lon}")


Bounding box coordinates for 10 km radius around Westminster Abbey:
Min Latitude: 51.454454954954954
Max Latitude: 51.54454504504504
Min Longitude: -0.19995902293139392
Max Longitude: -0.05524097706860606


In [135]:
# Number of towers to generate
n_towers = 200

# Generate random tower coordinates within the bounding box
towers = pd.DataFrame({
    'tower_id': range(1, n_towers + 1),
    'latitude': np.random.uniform(min_lat, max_lat, n_towers),
    'longitude': np.random.uniform(min_lon, max_lon, n_towers)
})

# Display the generated tower coordinates
print(towers)

     tower_id   latitude  longitude
0           1  51.479101  -0.065903
1           2  51.497765  -0.068240
2           3  51.475451  -0.130505
3           4  51.479788  -0.170903
4           5  51.477301  -0.080987
..        ...        ...        ...
195       196  51.506282  -0.066702
196       197  51.493634  -0.119591
197       198  51.478605  -0.093618
198       199  51.504726  -0.075284
199       200  51.472150  -0.184331

[200 rows x 3 columns]


In [136]:
import plotly.express as px

# Plot tower locations on an interactive map
fig = px.scatter_mapbox(
    towers,
    lat='latitude',
    lon='longitude',
    #color='tower_id',  # Color by tower ID (you can change this to another column if needed)
    size_max=50,
    zoom=10,  # Set the zoom level
    mapbox_style="open-street-map",  # Set the map style (you can also use "open-street-map", carto-positron, etc.)
    title="Tower Locations within 10 km of Westminster Abbey"
)

# Show the map
fig.show()

In [137]:
# Display the updated towers DataFrame
n_users = 5000

interests = ['Sports', 'Shopping', 'Food', 'Travel']
brands = ['Nike', 'Apple', 'Samsung', 'Adidas', 'Sony']
eco_profiles = ['Low', 'Middle', 'High']
users = pd.DataFrame({
    'user_id': range(1, n_users + 1),
    'age': np.random.randint(18, 65, n_users),
    'gender': np.random.choice(['Male', 'Female', 'Other'], n_users),
    'interest': np.random.choice(interests, n_users),
    'socioeconomic_profile': np.random.choice(eco_profiles, n_users),
    'brand_affinity': np.random.choice(brands, n_users)
})

# Adding 'brand_affinity' with random selection of 0 to 5 brands
users['brand_affinity'] = [
    np.random.choice(brands, np.random.randint(1, 6), replace=False).tolist()  # 0 to 5 brands per user
    for _ in range(n_users)
]

In [138]:
n_records = 10000

logs = pd.DataFrame({
    'user_id': np.random.choice(users['user_id'], n_records),
    'tower_id': np.random.choice(towers['tower_id'], n_records),
    'timestamp': pd.date_range(start='2024-01-01', periods=n_records, freq='min'),
})

print(logs.head(10))

   user_id  tower_id           timestamp
0     4472        30 2024-01-01 00:00:00
1     3737       122 2024-01-01 00:01:00
2      507       189 2024-01-01 00:02:00
3     2401       178 2024-01-01 00:03:00
4     2231       186 2024-01-01 00:04:00
5      601       139 2024-01-01 00:05:00
6     1149       143 2024-01-01 00:06:00
7     4290       153 2024-01-01 00:07:00
8     4054       157 2024-01-01 00:08:00
9     4002        93 2024-01-01 00:09:00


In [139]:
# Step 1: Join logs with users on 'user_id'
logs_users = logs.merge(users, on='user_id', how='inner')

# Step 2: Join the result with towers on 'tower_id'
final_data = logs_users.merge(towers, on='tower_id', how='inner')

# Display the final joined dataset
print(final_data.head(10))

   user_id  tower_id           timestamp  age  gender  interest  \
0     4472        30 2024-01-01 00:00:00   26   Other    Travel   
1     3737       122 2024-01-01 00:01:00   32   Other    Sports   
2      507       189 2024-01-01 00:02:00   53  Female    Travel   
3     2401       178 2024-01-01 00:03:00   63   Other    Travel   
4     2231       186 2024-01-01 00:04:00   37   Other      Food   
5      601       139 2024-01-01 00:05:00   62   Other    Sports   
6     1149       143 2024-01-01 00:06:00   48   Other  Shopping   
7     4290       153 2024-01-01 00:07:00   43    Male    Travel   
8     4054       157 2024-01-01 00:08:00   32    Male  Shopping   
9     4002        93 2024-01-01 00:09:00   35   Other  Shopping   

  socioeconomic_profile                        brand_affinity   latitude  \
0                Middle  [Apple, Samsung, Adidas, Sony, Nike]  51.475874   
1                  High                                [Sony]  51.516315   
2                   Low  [Sony, Ap

In [140]:
# Step 1: Define Actual POIs for a 10 km Radius Around Westminster Abbey
westminster_pois = [
    {"name": "Westminster Abbey", "category": "Tourist Attraction", "latitude": 51.4993, "longitude": -0.1273},
    {"name": "Big Ben", "category": "Tourist Attraction", "latitude": 51.5007, "longitude": -0.1246},
    {"name": "Buckingham Palace", "category": "Tourist Attraction", "latitude": 51.5014, "longitude": -0.1419},
    {"name": "Trafalgar Square", "category": "Tourist Attraction", "latitude": 51.5080, "longitude": -0.1281},
    {"name": "St. James's Park", "category": "Park", "latitude": 51.5023, "longitude": -0.1346},
    {"name": "Hyde Park", "category": "Park", "latitude": 51.5073, "longitude": -0.1657},
    {"name": "Victoria Station", "category": "Transport", "latitude": 51.4964, "longitude": -0.1431},
    {"name": "Piccadilly Circus", "category": "Tourist Attraction", "latitude": 51.5098, "longitude": -0.1342},
    {"name": "London Eye", "category": "Tourist Attraction", "latitude": 51.5033, "longitude": -0.1195},
    {"name": "The National Gallery", "category": "Tourist Attraction", "latitude": 51.5090, "longitude": -0.1283},
    {"name": "Houses of Parliament", "category": "Tourist Attraction", "latitude": 51.4995, "longitude": -0.1248},
    {"name": "The Mall", "category": "Tourist Attraction", "latitude": 51.5049, "longitude": -0.1357},
    {"name": "Green Park", "category": "Park", "latitude": 51.5069, "longitude": -0.1427},
    {"name": "Regent's Park", "category": "Park", "latitude": 51.5313, "longitude": -0.1569},
    {"name": "Oxford Circus", "category": "Shopping", "latitude": 51.5154, "longitude": -0.1419},
    {"name": "Covent Garden", "category": "Shopping", "latitude": 51.5115, "longitude": -0.1234},
    {"name": "Leicester Square", "category": "Tourist Attraction", "latitude": 51.5111, "longitude": -0.1280},
    {"name": "Soho", "category": "Shopping", "latitude": 51.5136, "longitude": -0.1312},
    {"name": "West End Theatre District", "category": "Tourist Attraction", "latitude": 51.5120, "longitude": -0.1300},
    {"name": "Charing Cross Station", "category": "Transport", "latitude": 51.5080, "longitude": -0.1247},
    {"name": "Paddington Station", "category": "Transport", "latitude": 51.5154, "longitude": -0.1757},
    {"name": "Southbank Centre", "category": "Tourist Attraction", "latitude": 51.5053, "longitude": -0.1168},
    {"name": "Waterloo Station", "category": "Transport", "latitude": 51.5036, "longitude": -0.1137},
    {"name": "King's Cross Station", "category": "Transport", "latitude": 51.5312, "longitude": -0.1241},
    {"name": "British Museum", "category": "Tourist Attraction", "latitude": 51.5194, "longitude": -0.1270},
    {"name": "Shakespeare's Globe", "category": "Tourist Attraction", "latitude": 51.5081, "longitude": -0.0972},
    {"name": "Millennium Bridge", "category": "Tourist Attraction", "latitude": 51.5081, "longitude": -0.0983},
    {"name": "Tower Bridge", "category": "Tourist Attraction", "latitude": 51.5055, "longitude": -0.0754},
    {"name": "Tower of London", "category": "Tourist Attraction", "latitude": 51.5081, "longitude": -0.0761},
    {"name": "Shard London", "category": "Tourist Attraction", "latitude": 51.5045, "longitude": -0.0865},
    {"name": "Camden Market", "category": "Shopping", "latitude": 51.5414, "longitude": -0.1455},
    {"name": "Harrods", "category": "Shopping", "latitude": 51.4995, "longitude": -0.1633},
    {"name": "Natural History Museum", "category": "Tourist Attraction", "latitude": 51.4967, "longitude": -0.1764},
    {"name": "Science Museum", "category": "Tourist Attraction", "latitude": 51.4978, "longitude": -0.1745},
    {"name": "Victoria and Albert Museum", "category": "Tourist Attraction", "latitude": 51.4966, "longitude": -0.1722},
    {"name": "Imperial War Museum", "category": "Tourist Attraction", "latitude": 51.4957, "longitude": -0.1082},
    {"name": "Kensington Gardens", "category": "Park", "latitude": 51.5058, "longitude": -0.1878},
    {"name": "Hampstead Heath", "category": "Park", "latitude": 51.5605, "longitude": -0.1657},
    {"name": "Serpentine Galleries", "category": "Tourist Attraction", "latitude": 51.5048, "longitude": -0.1744},
    {"name": "Carnaby Street", "category": "Shopping", "latitude": 51.5134, "longitude": -0.1407},
    {"name": "Portobello Road Market", "category": "Shopping", "latitude": 51.5178, "longitude": -0.2059},
    {"name": "Holborn Station", "category": "Transport", "latitude": 51.5175, "longitude": -0.1192},
    {"name": "Russell Square", "category": "Park", "latitude": 51.5238, "longitude": -0.1249},
    {"name": "Fitzrovia", "category": "Shopping", "latitude": 51.5185, "longitude": -0.1358},
    {"name": "London Transport Museum", "category": "Tourist Attraction", "latitude": 51.5122, "longitude": -0.1204},
    {"name": "The Gherkin", "category": "Tourist Attraction", "latitude": 51.5145, "longitude": -0.0804},
    {"name": "The Walkie Talkie", "category": "Tourist Attraction", "latitude": 51.5106, "longitude": -0.0837}
]

# Convert POIs to a DataFrame
poi_data = pd.DataFrame(westminster_pois)

In [141]:
import math
import pandas as pd
from geopy.distance import geodesic

# Haversine formula to calculate distance between two latitude/longitude points
def haversine(lat1, lon1, lat2, lon2):
    R = 6371  # Radius of Earth in kilometers
    phi1, phi2 = math.radians(lat1), math.radians(lat2)
    delta_phi = math.radians(lat2 - lat1)
    delta_lambda = math.radians(lon2 - lon1)

    a = math.sin(delta_phi / 2)**2 + math.cos(phi1) * math.cos(phi2) * math.sin(delta_lambda / 2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    return R * c * 1000  # Convert to meters

# Function to calculate visit counts for POIs based on tower proximity
def calculate_visit_count(tower_df, poi_df, user_df, radius=500):
    visit_counts = []

    for _, poi in poi_df.iterrows():
        poi_lat, poi_lon = poi['latitude'], poi['longitude']
        # Filter towers within 500 meters of the POI
        nearby_towers = tower_df[
            tower_df.apply(lambda x: haversine(poi_lat, poi_lon, x['latitude'], x['longitude']) <= radius, axis=1)
        ]
        
        # Get users connected to nearby towers
        visit_count = user_df[user_df['tower_id'].isin(nearby_towers['tower_id'])].shape[0]
        visit_counts.append({'name': poi['name'], 'visit_count': visit_count})
    
    return pd.DataFrame(visit_counts)


# Calculate visit counts
visit_counts_df = calculate_visit_count(towers, poi_data, logs, radius=500)
print(visit_counts_df[visit_counts_df['visit_count'] > 0])


                          name  visit_count
0            Westminster Abbey           49
1                      Big Ben          110
2            Buckingham Palace          109
5                    Hyde Park          105
6             Victoria Station           57
8                   London Eye          147
9         The National Gallery           60
10        Houses of Parliament           99
12                  Green Park           52
14               Oxford Circus          201
15               Covent Garden           60
16            Leicester Square           60
17                        Soho           56
19       Charing Cross Station          121
21            Southbank Centre          202
22            Waterloo Station          141
23        King's Cross Station          107
24              British Museum           93
25         Shakespeare's Globe          134
26           Millennium Bridge          134
27                Tower Bridge          101
28             Tower of London  

In [142]:
result = np.random.choice([1, 2, 3, 4, 5], size=3, replace=True)
print(result)


[3 4 5]


In [143]:
result = np.random.choice([1, 2, 3, 4, 5], size=100, replace=True)
print(result)


[4 4 5 5 3 3 2 4 1 1 5 5 5 2 4 5 5 5 4 5 2 2 5 5 5 3 3 5 2 5 4 4 3 3 4 2 4
 2 4 3 3 2 3 4 5 3 5 4 5 1 3 1 4 4 1 3 2 4 3 1 5 3 2 3 2 1 2 5 2 3 4 1 5 1
 2 1 5 5 5 5 5 1 5 2 2 5 2 2 5 3 3 4 2 1 4 1 3 5 1 1]
