In [None]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
import math
import pandas as pd
import numpy as np
import re
from dateutil import parser
from datetime import datetime, timedelta

# Preparing location data

In [None]:
with open("location.txt") as file_in:
    l = file_in.readlines()

In [None]:
lines = []
time = ""
count = 0
pattern = r"^(.*?) - Lat:\s*([-\d.]+),\s*Lon:\s*([-\d.]+)"

for line in l:
    match = re.match(pattern, line)
    if match:
        line_time = match.group(1)
        lat = match.group(2)
        lon = match.group(3)

        if line_time != time:
            time = line_time
            count = -1
        count += 1

        lines.append({
            'time': datetime.strptime(line_time, "%Y-%m-%d %H:%M:%S"),
            'count': count,
            'lat': float(lat),
            'lon': float(lon)
        })
    else:
        print("Line didn't match expected format:", line)

In [None]:
np_times = np.array([l['time'] for l in lines], dtype=np.datetime64)

# Prepare Wardriving data

In [None]:
with open("readings") as file_in:
    l = file_in.readlines()
    l = [L.strip() for L in l]

In [None]:
start_time = ""
end_time = ""
ssid_groups = []
sub_group_lines = []
in_group = False
for line in l:
    # TODO: Update to not have hard coded dates
    if  (not in_group) and (line.find("Mon May  5") != -1 or line.find("Tue May  6") != -1):
        start_time = line
        in_group = True
    # TODO: Update to not have hard coded dates
    elif (in_group) and ((line.find("Mon May  5") != -1 or line.find("Tue May  6") != -1)) and len(sub_group_lines) > 0:
        end_time = line
        ssid_groups.append({
            'start': start_time,
            'end':end_time,
            'lines':sub_group_lines
        })
        start_time = ''
        end_time = ''
        sub_group_lines = []
        in_group = False
    # TODO: Update to not have hard coded dates
    elif (in_group) and ((line.find("Mon May  5") != -1 or line.find("Tue May  6") != -1)) and len(sub_group_lines) == 0:
        start_time = line
    else:
        sub_group_lines.append(line)

In [None]:
for group in ssid_groups:
    group['cells'] = []
    cell_dict = dict()
    count = -1
    in_cell = False
    for line in group['lines']:
        if (not in_cell) and (line.find("Cell") != -1):
            count = count + 1
            cell_dict['address'] = line.split("Address:")[-1].strip()
            in_cell = True
        elif (in_cell) and (line.find("ESSID") != -1):
            cell_dict['essid'] = line.split("ESSID:")[-1].strip()
        
        elif (in_cell) and (line.find("Encryption") != -1):
            if line.find("on") != -1:
                cell_dict['encrypted'] = True
            elif line.find("off") != -1:
                cell_dict['encrypted'] = False
        
        elif (in_cell) and (line.find("Frequency") != -1):
            cell_dict['frequency'] = line.split("Frequency:")[-1].strip().strip("GHz")[0].strip()
        
        elif (in_cell) and (line.find("Signal level") != -1):
            cell_dict['power'] = line.split("Signal level=")[-1].strip()    
        
        elif (in_cell) and (line.find("Cell") != -1):
            cell_dict['position'] = count
            group['cells'].append(cell_dict)
            cell_dict = dict()
            count = count + 1
            cell_dict['address'] = line.split("Address:")[-1].strip()
            in_cell = True

In [None]:
for group in ssid_groups:
    start_timestamp = parser.parse(group['start']).timestamp()
    end_timestamp = parser.parse(group['end']).timestamp()
    size = len(group['cells'])
    times = np.linspace(start_timestamp, end_timestamp, size)
    for cell in group['cells']:
        cell['time'] = datetime.fromtimestamp(times[cell['position']])

In [None]:
cells = []
for group in ssid_groups:
    for cell in group['cells']:
        cells.append(cell)

# Convert to dataframe

In [None]:
def get_lat_lon(r, lat_lon):

    t = r['time']
    lat_lon_row = -1
    difference = float('inf')  # use a more readable large number
    lat_lon_row = np.abs(np_times - np.datetime64(t)).argmin()
    if lat_lon_row != -1:
        r['lat'] = lat_lon[lat_lon_row]['lat']
        r['lon'] = lat_lon[lat_lon_row]['lon']
        r['loc_time'] = lat_lon[lat_lon_row]['time']
    else:
        r['lat'] = None
        r['lon'] = None
        r['loc_time'] = None

    return r

In [None]:
df = pd.DataFrame.from_records(cells)
df['power'] = df['power'].apply(lambda x: 100 - float(x.strip().split(" ")[0]) * -1)

In [None]:
df.head()

In [None]:
target_ssid = "McDonaldsWifi"
def ole_mac(x):
    try:
        x = x.lower().strip()
        if x.find(target_ssid.lower().strip()) != -1:
            return True
    except:
        return False
    return False
mc = df[df['essid'].apply(lambda x: ole_mac(x))]
#mc = df[df['target']]

In [None]:
mc= mc.apply(lambda x: get_lat_lon(x, lines), axis=1)

In [None]:
import plotly.express as px
import pandas as pd

In [None]:
fig = px.scatter_mapbox(
    mc,
    lat="lat",
    lon="lon",
    color="power",            # Color by intensity
    size = "power",
    opacity=0.3,
    text = "essid",
    color_continuous_scale="magma",
    zoom=3,
    height=600
    
)

fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.update_layout(height=1200)
fig.show()

In [None]:
df= df.apply(lambda x: get_lat_lon(x, lines), axis=1)

In [None]:
fig = px.scatter_mapbox(
    df,
    lat="lat",
    lon="lon",
    color="power",            # Color by intensity
    size = "power",
    opacity=0.05,
    text = "essid",
    color_continuous_scale="viridis",
    zoom=3,
    height=600
    
)

fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.update_layout(height=1200) 
fig.show()

In [None]:
df['target'] = df['essid'].apply(lambda x: ole_mac(x))

In [None]:
df['target'].value_counts()

In [None]:
fig = px.scatter_mapbox(
    df,
    lat="lat",
    lon="lon",
    color="target",            # Color by intensity
    size = "power",
    opacity=0.2,
    text = "essid",
    color_continuous_scale="solar",
        color_discrete_map={
        True: "red",
        False: "grey"
    },
    zoom=3,
    height=600
    
)

fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.update_layout(height=1200)
fig.show()

In [None]:
df.to_csv("dump.csv", index=False)

# Power Filtering


In [None]:
sns.set(style="whitegrid")
palette = sns.color_palette("viridis_r", as_cmap=False)  # Use magma as a list of colors

# Boxplot: x = category, y = numeric variable, hue = optional grouping
ax = sns.boxplot(x='target', y='power', data=df, palette=palette)

# Show the plot
plt.title("Power Difference")
plt.show()

In [None]:
fig = px.scatter_mapbox(
    mc[mc['power'] > 30],
    lat="lat",
    lon="lon",
    color="power",            # Color by intensity
    size = "power",
    opacity=0.3,
    text = "essid",
    color_continuous_scale="magma",
    zoom=3,
    height=600
    
)

fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.update_layout(height=1200)
fig.show()