In [4]:
import pandas as pd
import numpy as np
import os

from itertools import combinations
import math
import folium
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter

import plotly.express as px
from dash import Dash, dcc, html, Input, Output, dash_table
import matplotlib.pyplot as plt
import seaborn as sns

In [5]:
base = r"D:\Fiveth\Project_semester_automn_2025\Marjetas_Data\Marjetas_Data\JKL LHT\Data"
files = os.listdir(base)
files

['LHT65005(JKL)-TEMP.csv',
 'LHT65006(JLK)-TEMP.csv',
 'LHT65007(JKL)-TEMP.csv',
 'LHT65008(JKL)-TEMP.csv',
 'LHT65009(JKL)-TEMP.csv',
 'LHT65010(JKL)-TEMP.csv',
 'LHT65013(JKL)-TEMP.csv']

In [6]:
# Let's show the sensor locations on a map with their street names:
sensors = {
    'LHT65013': (62.234563, 25.672774),
    'LHT65010': (62.260777, 25.693876),
    'LHT65009': (62.222971, 25.804673),
    'LHT65008': (62.227604, 25.736853),
    'LHT65007': (62.286678, 25.74533),
    'LHT65006': (62.265198, 25.89008),
    'LHT65005': (62.197614, 25.720489),
    'LHT65004': (62.211111, 25.752563)}

# Geocoding setup:
geolocator = Nominatim(user_agent="sensor_map")
reverse = RateLimiter(geolocator.reverse, min_delay_seconds=1)

# Creating the map:
sensors_location = folium.Map(location=[62.24, 25.75], zoom_start=12)
# Adding markers with street names:
for name, (lat, lon) in sensors.items():
    location = reverse((lat, lon))
    street = location.raw['address'].get('road')
    folium.Marker([lat, lon], popup=f"{name}: {street}").add_to(sensors_location)

sensors_location

In [None]:
def haversine_km(lat1, lon1, lat2, lon2):
    earth_radius_km = 6371.0
    dlat = math.radians(lat2 - lat1)
    dlon = math.radians(lon2 - lon1) 
    a = (math.sin(dlat/2)**2
         + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2))
         * math.sin(dlon/2)**2)
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    return earth_radius_km * c
#  Compute distance for every pair
pair_rows = []
for sensor_a, sensor_b in combinations(sensors.keys(), 2):
    lat_a, lon_a = sensors[sensor_a]
    lat_b, lon_b = sensors[sensor_b]
    distance_km = haversine_km(lat_a, lon_a, lat_b, lon_b)
    pair_rows.append({
        "sensor_a": sensor_a,
        "sensor_b": sensor_b,
        "distance_km": round(distance_km, 3) 
    })

# 4) Make a tidy table (nearest → farthest)
pairwise_distance_table = pd.DataFrame(pair_rows).sort_values("distance_km").reset_index(drop=True)

# 5) Quick highlights
closest = pairwise_distance_table.iloc[0]
farthest = pairwise_distance_table.iloc[-1]
print(f"Closest:  {closest['sensor_a']} – {closest['sensor_b']} = {closest['distance_km']} km")
print(f"Farthest: {farthest['sensor_a']} – {farthest['sensor_b']} = {farthest['distance_km']} km")

pairwise_distance_table.head()

Closest:  LHT65008 – LHT65004 = 2.007 km
Farthest: LHT65013 – LHT65006 = 11.755 km


Unnamed: 0,sensor_a,sensor_b,distance_km
0,LHT65008,LHT65004,2.007
1,LHT65005,LHT65004,2.24
2,LHT65009,LHT65004,3.006
3,LHT65013,LHT65010,3.113
4,LHT65013,LHT65008,3.409


In [8]:
file_path = [os.path.join(base, f) for f in os.listdir(base) if f.endswith('.csv')]
df= pd.concat((pd.read_csv(f, sep=';') for f in file_path), ignore_index=True)

df["Timestamp"] = pd.to_datetime(df["Timestamp"],format="%Y-%m-%d %H:%M:%S", errors="coerce")
print("NaT after parse:", df["Timestamp"].isna().sum())
# Sort the ds by timestamp
df = df.sort_values(by="Timestamp").reset_index(drop=True)

NaT after parse: 0


In [9]:
df.head()

Unnamed: 0,Timestamp,TempC_SHT,Hum_SHT
0,2020-10-27 15:00:02,23.62,35.7
1,2020-10-27 15:01:22,24.15,34.2
2,2020-10-27 15:04:38,23.66,34.0
3,2020-10-27 15:14:30,23.32,32.8
4,2020-10-27 15:20:00,20.95,36.2
