#### Generate synthetic Data for Telecom Location

In [8]:
import pandas as pd
import numpy as np
import random
from faker import Faker

fake = Faker()

# Parameters
n_users = 2000
n_towers = 50
n_pois = 100
n_records = 5000


In [9]:
# Generate synthetic mobile tower logs
import pandas as pd
import numpy as np

# Number of towers
n_towers = 50

# Define the bounding box for London streets
latitude_range = (51.5098, 51.5155)
longitude_range = (-0.1515, -0.1357)

# Generate random tower coordinates within the bounding box
towers = pd.DataFrame({
    'tower_id': range(1, n_towers + 1),
    'latitude': np.random.uniform(latitude_range[0], latitude_range[1], n_towers),
    'longitude': np.random.uniform(longitude_range[0], longitude_range[1], n_towers)
})

# Display the updated towers DataFrame
#print(towers)
brands = ['Nike', 'Apple', 'Samsung', 'Adidas', 'Sony']
eco_profiles = ['Low', 'Middle', 'High']
users = pd.DataFrame({
    'user_id': range(1, n_users + 1),
    'age': np.random.randint(18, 65, n_users),
    'gender': np.random.choice(['Male', 'Female', 'Other'], n_users),
    'interest': np.random.choice(['Sports', 'Shopping', 'Food', 'Travel'], n_users),
    'socioeconomic_profile': np.random.choice(eco_profiles, n_users),
    'brand_affinity': np.random.choice(brands, n_users)
})

# Adding 'brand_affinity' with random selection of 0 to 5 brands
users['brand_affinity'] = [
    np.random.choice(brands, np.random.randint(1, 6), replace=False).tolist()  # 0 to 5 brands per user
    for _ in range(n_users)
]

logs = pd.DataFrame({
    'user_id': np.random.choice(users['user_id'], n_records),
    'tower_id': np.random.choice(towers['tower_id'], n_records),
    'timestamp': pd.date_range(start='2024-01-01', periods=n_records, freq='min'),
})

#print(users.head(10))
#print(towers.head(10))

# Step 1: Join logs with users on 'user_id'
logs_users = logs.merge(users, on='user_id', how='inner')

# Step 2: Join the result with towers on 'tower_id'
final_data = logs_users.merge(towers, on='tower_id', how='inner')

# Display the final joined dataset
print(final_data)

      user_id  tower_id           timestamp  age  gender  interest  \
0         425        48 2024-01-01 00:00:00   27    Male  Shopping   
1        1810        46 2024-01-01 00:01:00   30   Other      Food   
2        1311        23 2024-01-01 00:02:00   28    Male    Travel   
3        1201        35 2024-01-01 00:03:00   39    Male    Sports   
4        1481        33 2024-01-01 00:04:00   31  Female      Food   
...       ...       ...                 ...  ...     ...       ...   
4995      983        16 2024-01-04 11:15:00   18   Other      Food   
4996      595        35 2024-01-04 11:16:00   58    Male  Shopping   
4997      537         6 2024-01-04 11:17:00   62   Other    Sports   
4998     1929        47 2024-01-04 11:18:00   52    Male      Food   
4999     1197        23 2024-01-04 11:19:00   51    Male      Food   

     socioeconomic_profile                        brand_affinity   latitude  \
0                     High                         [Apple, Nike]  51.513284   
1

In [10]:
# Generate synthetic POI data
# Synthetic Points of Interest for Oxford Street
oxford_street_pois = [
    {"name": "Oxford Circus Station", "category": "Transport", "latitude": 51.5154, "longitude": -0.1419},
    {"name": "Selfridges", "category": "Mall", "latitude": 51.5145, "longitude": -0.1515},
    {"name": "Primark Oxford Street", "category": "Shopping", "latitude": 51.5142, "longitude": -0.1467},
    {"name": "John Lewis & Partners", "category": "Shopping", "latitude": 51.5152, "longitude": -0.1439},
    {"name": "Oxford Street Starbucks", "category": "Restaurant", "latitude": 51.5147, "longitude": -0.1475}
]

# Synthetic Points of Interest for Regent Street
regent_street_pois = [
    {"name": "Hamleys", "category": "Shopping", "latitude": 51.5121, "longitude": -0.1409},
    {"name": "Regent Street Apple Store", "category": "Shopping", "latitude": 51.5133, "longitude": -0.1418},
    {"name": "Piccadilly Circus", "category": "Tourist Attraction", "latitude": 51.5098, "longitude": -0.1357},
    {"name": "The Argyll Arms", "category": "Restaurant", "latitude": 51.5146, "longitude": -0.1412},
    {"name": "Regent Street Cinema", "category": "Entertainment", "latitude": 51.5131, "longitude": -0.1454}
]

# Combine the data into a single DataFrame
pois = pd.DataFrame(oxford_street_pois + regent_street_pois)

print(pois)

                        name            category  latitude  longitude
0      Oxford Circus Station           Transport   51.5154    -0.1419
1                 Selfridges                Mall   51.5145    -0.1515
2      Primark Oxford Street            Shopping   51.5142    -0.1467
3      John Lewis & Partners            Shopping   51.5152    -0.1439
4    Oxford Street Starbucks          Restaurant   51.5147    -0.1475
5                    Hamleys            Shopping   51.5121    -0.1409
6  Regent Street Apple Store            Shopping   51.5133    -0.1418
7          Piccadilly Circus  Tourist Attraction   51.5098    -0.1357
8            The Argyll Arms          Restaurant   51.5146    -0.1412
9       Regent Street Cinema       Entertainment   51.5131    -0.1454


#### Audience Insights
- Using clustering to segment users based on demographics and location patterns:

In [11]:
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

# Prepare user demographic features
user_features = users[['age']].join(pd.get_dummies(users['gender']))
scaler = StandardScaler()
scaled_features = scaler.fit_transform(user_features)

# Apply clustering
kmeans = KMeans(n_clusters=5, random_state=42)
users['segment'] = kmeans.fit_predict(scaled_features)


#### Movement Insights
- Using trajectory analysis to study origin-destination patterns:

In [12]:
# Merge logs with towers for location data
logs = logs.merge(towers, on='tower_id', how='left')

# Calculate user movement patterns
logs['next_latitude'] = logs.groupby('user_id')['latitude'].shift(-1)
logs['next_longitude'] = logs.groupby('user_id')['longitude'].shift(-1)

# Filter meaningful movements
movements = logs.dropna(subset=['next_latitude', 'next_longitude'])
movements['distance'] = np.sqrt(
    (movements['latitude'] - movements['next_latitude'])**2 + 
    (movements['longitude'] - movements['next_longitude'])**2
)
    

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  movements['distance'] = np.sqrt(


#### POI Analysis
- Using a recommendation-like system to associate users with nearby POIs:

In [13]:
from scipy.spatial import cKDTree

# Group by user and apply clustering
def cluster_user_locations(user_data):
    coords = user_data[['latitude', 'longitude']].values
    clustering = DBSCAN(eps=0.0015, min_samples=2, metric=lambda x, y: geodesic(x, y).meters).fit(coords)
    user_data['cluster'] = clustering.labels_
    return user_data

data = final_data.groupby('user_id').apply(cluster_user_locations)
data = data[data['cluster'] != -1]  # Filter noise

# Aggregate clusters into potential POIs
clustered_pois = data.groupby(['cluster']).agg({
    'latitude': 'mean',
    'longitude': 'mean',
    'user_id': 'count'
}).rename(columns={'user_id': 'visit_count'}).reset_index()

print(clustered_pois)

# Build a KDTree for POIs
poi_tree = cKDTree(pois[['latitude', 'longitude']])

# Find nearest POIs for each tower
distances, indices = poi_tree.query(towers[['latitude', 'longitude']], k=3)
towers['nearby_pois'] = [pois.iloc[i]['name'].tolist() for i in indices]


NameError: name 'DBSCAN' is not defined

#### Visualization

##### Audience Segments

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.countplot(data=users, x='segment', hue='gender')
plt.title('Audience Segmentation by Gender')
plt.show()


##### Movement Flows

In [None]:
import plotly.express as px

# Visualize movement as flow
fig = px.line_geo(movements, lat='latitude', lon='longitude', color='user_id',
                  title='User Movement Patterns',)
fig.show()


##### POI Proximity

In [None]:
import plotly.express as px

# Plot the POIs
fig = px.scatter_mapbox(
    pois,
    lat="latitude",
    lon="longitude",
    text="name",
    color="category",
    zoom=15,
    mapbox_style="open-street-map",
    title="Points of Interest on Oxford and Regent Streets"
)

fig.add_scattergeo(lat=towers['latitude'], lon=towers['longitude'], 
                   mode='markers', name='Towers', marker=dict(color='blue'))
fig.show()


#### Identify Frequently Visited POIs

In [None]:
from sklearn.cluster import DBSCAN
from geopy.distance import geodesic

# Group by user and apply clustering
def cluster_user_locations(user_data):
    coords = user_data[['latitude', 'longitude']].values
    clustering = DBSCAN(eps=0.0010, min_samples=2, metric=lambda x, y: geodesic(x, y).meters).fit(coords)
    user_data['cluster'] = clustering.labels_
    return user_data

data = final_data.groupby('user_id').apply(cluster_user_locations)
data = data[data['cluster'] != -1]  # Filter noise

# Aggregate clusters into potential POIs
clustered_pois = data.groupby(['cluster']).agg({
    'latitude': 'mean',
    'longitude': 'mean',
    'user_id': 'count'
}).rename(columns={'user_id': 'visit_count'}).reset_index()

print(clustered_pois)


In [None]:
#Matching POIs with Known Locations
from scipy.spatial import cKDTree

# Build KDTree for POIs
poi_tree = cKDTree(pois[['latitude', 'longitude']])

# Match clustered POIs to known POIs
distances, indices = poi_tree.query(clustered_pois[['latitude', 'longitude']], k=1)
clustered_pois['nearest_poi'] = [pois.iloc[i]['name'] for i in indices]
clustered_pois['poi_category'] = [pois.iloc[i]['category'] for i in indices]
clustered_pois['distance_to_poi'] = distances

print(clustered_pois)


##### Visualize the identified POIs and clusters using Plotly:

In [None]:
import plotly.express as px

fig = px.scatter_mapbox(
    clustered_pois,
    lat="latitude",
    lon="longitude",
    color="poi_category",
    size="visit_count",
    hover_name="nearest_poi",
    mapbox_style="open-street-map",
    zoom=15,
    title="Identified Points of Interest"
)

fig.add_scattermapbox(
    lat=pois['latitude'],
    lon=pois['longitude'],
    mode='markers',
    marker=dict(size=10, color='red'),
    name='Known POIs',
    text=pois['name']
)

fig.show()
