In [2]:
import pandas as pd
df=pd.read_csv('filtered_maids_pingsink.csv')

# Create bins for pingsink scores
bins = [0, 0.2, 0.5, 0.75, 0.9, 1.0]
labels = ['0-20%', '20-50%', '50-75%', '75-90%', '90-100%']

# Add a new column with binned pingsink scores
df['pingsink_bin'] = pd.cut(df['pingsink'], bins=bins, labels=labels, right=True)

# Filter to show only home, work, leisure categories
categories_of_interest = ['home', 'work', 'leisure']
df_filtered = df[df['category'].isin(categories_of_interest)]

# Create a pivot table to count geohashes for each category and pingsink bin
pingsink_stats = pd.pivot_table(
    data=df_filtered,
    index='category',
    columns='pingsink_bin',
    values='geohash',
    aggfunc='nunique',
    fill_value=0
)

# Reindex to ensure all categories are present
pingsink_stats = pingsink_stats.reindex(categories_of_interest)

# Calculate percentage distribution across pingsink bins for each category
pingsink_pct = pingsink_stats.div(pingsink_stats.sum(axis=1), axis=0) * 100

# Display the statistics tables
print("Count of unique geohashes by category and pingsink:")
display(pingsink_stats)

print("\nPercentage distribution by category and pingsink:")
display(pingsink_pct.round(2))

Count of unique geohashes by category and pingsink:


  pingsink_stats = pd.pivot_table(


pingsink_bin,0-20%,20-50%,50-75%,75-90%,90-100%
category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
home,21,53,69,9,49
work,8,34,53,6,46
leisure,2,49,127,20,76



Percentage distribution by category and pingsink:


pingsink_bin,0-20%,20-50%,50-75%,75-90%,90-100%
category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
home,10.45,26.37,34.33,4.48,24.38
work,5.44,23.13,36.05,4.08,31.29
leisure,0.73,17.88,46.35,7.3,27.74


In [3]:
import glob
import pandas as pd
files= glob.glob("./res/*.pkl")

In [None]:
from envidence_new import EvidenceStore
d=EvidenceStore()
d.load_from_pickle(files[8])
for i in d.store.keys():
    print(i)
    print(d.score_pingsink(d.derive(i)))

evfx4ek
1.0


In [26]:
d.derive(i
)

{'meta': {'first_seen': '2025-04-15T12:52:12+00:00',
  'last_seen': '2025-05-29T18:04:17+00:00',
  'span_days': 45,
  'mean_coordinate': [33.592200027778745, -7.618400137871504],
  'mean_geohash': 'evfx4ek2wrvn',
  'std_geohash_m': 0.0,
  'mean_time_diff_seconds': 293871.1538461539},
 'level_1_primary': {'pings': 14,
  'unique_days': 9,
  'active_day_ratio': 0.15000000000000002,
  'gap_bins': {'0d': 0, '1-3d': 4, '4-7d': 2, '8-30d': 2, '>30d': 0}},
 'level_2_secondary': {'hourly_hist': [0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   1,
   3,
   0,
   2,
   2,
   0,
   1,
   1,
   1,
   1,
   0,
   0,
   0,
   0,
   2],
  'weekday_hist': [1, 8, 3, 2, 0, 0, 0],
  'monthly_hist': {'2025-04': 3, '2025-05': 11},
  'night_ratio': 0.14285714285714285,
  'weekday_day_ratio': 0.7857142857142857,
  'weekend_ratio': 0.0,
  'midday_weekday_ratio': 0.2857142857142857,
  'evening_ratio': 0.07142857142857142,
  'early_late_overlap_day_ratio': 0.2222222222222222,
  'night_days_ratio': 0.2222222

In [6]:
pd.set_option('display.max_columns', None)
s=0.6
e=0.9
sugges_pingsink=df[(df['pingsink']>s) & (df['pingsink']<e)].sort_values(by=['pingsink','confidence'],ascending=False)

In [8]:
import plotly.graph_objects as go
sample_pings=sugges_pingsink.sample(min(10,len(sugges_pingsink)))

for i in range(len(sample_pings)):
    p=sample_pings.reset_index()[i:i+1]
    fig = go.Figure()

    # Add scatter points for pingsink locations
    fig.add_trace(go.Scattermapbox(
        lat=p['lat'],
        lon=p['lon'],
        mode='markers',
        marker=dict(
            size=10,
            color=p['pingsink'],
            colorscale='Reds',
            showscale=False,
            colorbar=dict(title="Pingsink Score")
        ),
        text=[f"Category: {cat}<br>Confidence: {conf:.3f}<br>Pingsink: {ps:.3f}<br>Spread: {spread:.3f}<br>Pings: {pings}" 
              for cat, conf, ps, spread, pings in zip(p['category'], 
                                         p['confidence'], 
                                         p['pingsink'],
                                         p['spread'],
                                         p['pings'])],
        hoverinfo='text'
    ))

    fig.update_layout(
        mapbox=dict(
            style="open-street-map",
            center=dict(
                lat=p['lat'].mean(),
                lon=p['lon'].mean()
            ),
            zoom=17
        ),
        title=f"Suggested Pingsink Location {i+1}",
        height=600
    )

    fig.show()



*scattermapbox* is deprecated! Use *scattermap* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/




*scattermapbox* is deprecated! Use *scattermap* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/




*scattermapbox* is deprecated! Use *scattermap* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/




*scattermapbox* is deprecated! Use *scattermap* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/




*scattermapbox* is deprecated! Use *scattermap* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/




*scattermapbox* is deprecated! Use *scattermap* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/




*scattermapbox* is deprecated! Use *scattermap* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/




*scattermapbox* is deprecated! Use *scattermap* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/




*scattermapbox* is deprecated! Use *scattermap* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/




*scattermapbox* is deprecated! Use *scattermap* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/



In [23]:
import pandas as pd
maid_mapping=pd.read_parquet('data/months/all_maid.parquet')

In [35]:
maid=maid_mapping[maid_mapping.maid.str.startswith('ILNl06aHOk')].to_dict(orient='records')

In [36]:
maid[0]

{'maid': 'ILNl06aHOk/z+cGzKuHUBL3qVuaJoz2aNt9FwjS6wdFFbqAK0gF41e8L4ggG8JKv',
 'flux': 'B',
 'maid_flux': 12.0}

In [37]:
import duckdb
df=duckdb.query(f"""
select * from read_parquet('data/raw/2025-06-10/*.parquet')
where maid = '{maid[0]['maid']}'
""").df()

In [38]:
df

Unnamed: 0,maid,timestamp,date,country,latitude,longitude,horizontal_accuracy,ipv4,ipv6,altitude,altitude_accuracy,flux
0,ILNl06aHOk/z+cGzKuHUBL3qVuaJoz2aNt9FwjS6wdFFbq...,2025-06-11 17:33:58+07:00,2025-06-11,MAR,30.4189,-9.5929,0.0,160.177.190.193,,0.0,0.0,E
1,ILNl06aHOk/z+cGzKuHUBL3qVuaJoz2aNt9FwjS6wdFFbq...,2025-06-11 17:33:54+07:00,2025-06-11,MAR,30.4189,-9.5929,0.0,160.177.190.193,,0.0,0.0,E
2,ILNl06aHOk/z+cGzKuHUBL3qVuaJoz2aNt9FwjS6wdFFbq...,2025-06-11 17:38:49+07:00,2025-06-11,MAR,30.4189,-9.5929,0.0,160.177.190.193,,0.0,0.0,E
3,ILNl06aHOk/z+cGzKuHUBL3qVuaJoz2aNt9FwjS6wdFFbq...,2025-06-11 17:38:53+07:00,2025-06-11,MAR,30.4189,-9.5929,0.0,160.177.190.193,,0.0,0.0,E
4,ILNl06aHOk/z+cGzKuHUBL3qVuaJoz2aNt9FwjS6wdFFbq...,2025-06-11 17:41:06+07:00,2025-06-11,MAR,30.4189,-9.5929,0.0,160.177.190.193,,0.0,0.0,E
5,ILNl06aHOk/z+cGzKuHUBL3qVuaJoz2aNt9FwjS6wdFFbq...,2025-06-11 17:42:20+07:00,2025-06-11,MAR,30.4189,-9.5929,0.0,160.177.190.193,,0.0,0.0,E
6,ILNl06aHOk/z+cGzKuHUBL3qVuaJoz2aNt9FwjS6wdFFbq...,2025-06-11 17:43:51+07:00,2025-06-11,MAR,30.4189,-9.5929,0.0,160.177.190.193,,0.0,0.0,E
7,ILNl06aHOk/z+cGzKuHUBL3qVuaJoz2aNt9FwjS6wdFFbq...,2025-06-11 17:44:46+07:00,2025-06-11,MAR,30.4189,-9.5929,0.0,160.177.190.193,,0.0,0.0,E
