In [22]:
import pandas as pd
import numpy as np
import plotly.express as px
import os
import matplotlib.pyplot as plt

# --- 0. Läs in och slå ihop Data1 spårdata (Bridge, RailJoint, Turnout) ---

data1_folder = "Data1"

files = ["Bridge.csv", "RailJoint.csv", "Turnout.csv"]
df_list = []

for f in files:
    path = os.path.join(data1_folder, f)
    df = pd.read_csv(path)
    # Ta bara med Latitude och Longitude, om det finns
    if "Latitude" in df.columns and "Longitude" in df.columns:
        df = df[["Latitude", "Longitude"]].dropna()
        df_list.append(df)
    else:
        print(f"Warning: {f} saknar Latitude/Longitude")

# Slå ihop allt till en DataFrame
code1_df = pd.concat(df_list, ignore_index=True)

# --- 1. Läs in Data2 GPS + vibrationer ---

folder = "Data2"

gps_lat = pd.read_csv(os.path.join(folder, "GPS.latitude.csv"), header=None, names=["Latitude"])
gps_lon = pd.read_csv(os.path.join(folder, "GPS.longitude.csv"), header=None, names=["Longitude"])
vib1 = pd.read_csv(os.path.join(folder, "CH1_ACCEL1Z1.csv"), header=None, names=["Vibration1"])
vib2 = pd.read_csv(os.path.join(folder, "CH2_ACCEL1Z2.csv"), header=None, names=["Vibration2"])

# Lägg till tidsindex (om det saknas)
gps_lat['timestamp'] = gps_lat.index
gps_lon['timestamp'] = gps_lon.index
vib1['timestamp'] = vib1.index
vib2['timestamp'] = vib2.index

# Slå ihop GPS-data på timestamp
df_gps = pd.merge(gps_lat, gps_lon, on='timestamp')

# Slå ihop vibrationer på timestamp
df_vib = pd.merge(vib1, vib2, on='timestamp')

# --- 2. Filtrera Data2 GPS-data som ligger nära spåret från Data1 ---

# Här kan vi använda bounding box för att snabbfiltrera
margin = 1

min_lat, max_lat = code1_df['Latitude'].min() - margin, code1_df['Latitude'].max() + margin
min_lon, max_lon = code1_df['Longitude'].min() - margin, code1_df['Longitude'].max() + margin

df_gps_filtered = df_gps[
    (df_gps['Latitude'] >= min_lat) & (df_gps['Latitude'] <= max_lat) &
    (df_gps['Longitude'] >= min_lon) & (df_gps['Longitude'] <= max_lon)
].copy()

print(f"Bounding box with margin: lat {min_lat} to {max_lat}, lon {min_lon} to {max_lon}")
print(f"Filtered GPS points (within expanded Code1 track): {len(df_gps_filtered)}")


# Men bounding box är grovt, så gör ett närmefilter:
# Beräkna avstånd (i grader) mellan varje GPS-punkt och närmsta punkt i spåret

def haversine(lat1, lon1, lat2, lon2):
    """
    Beräkna avstånd (i meter) mellan två lat/lon-punkter med haversine-formeln.
    """
    R = 6371000  # jordens radie i meter
    phi1, phi2 = np.radians(lat1), np.radians(lat2)
    d_phi = np.radians(lat2 - lat1)
    d_lambda = np.radians(lon2 - lon1)
    a = np.sin(d_phi/2)**2 + np.cos(phi1)*np.cos(phi2)*np.sin(d_lambda/2)**2
    c = 2*np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    return R * c

# För varje GPS-punkt, hitta minsta avstånd till spåret (alla code1_df punkter)
# Det kan bli tungt med stora dataset, så för enklast demo kör med bounding box först

# Konvertera till numpy arrayer
gps_points = df_gps_filtered[["Latitude", "Longitude"]].to_numpy()
track_points = code1_df[["Latitude", "Longitude"]].to_numpy()

min_distances = []
for lat, lon in gps_points:
    dists = haversine(lat, lon, track_points[:,0], track_points[:,1])
    min_distances.append(np.min(dists))

min_distances = np.array(min_distances)

# Definiera maxavstånd för att räkna som "på spåret" (t.ex. 50 meter)
max_distance_m = 50

# Filtrera GPS-punkter inom maxavstånd
mask = min_distances <= max_distance_m
df_gps_filtered = df_gps_filtered[mask].copy()

print(f"Original GPS points: {len(df_gps)}")
print(f"Filtered GPS points (within Code1 track): {len(df_gps_filtered)}")

# Filtrera vibrationer baserat på timestamps
timestamps = df_gps_filtered['timestamp']
df_vib_filtered = df_vib[df_vib['timestamp'].isin(timestamps)].copy()

print(f"Original vibration samples: {len(df_vib)}")
print(f"Filtered vibration samples: {len(df_vib_filtered)}")

# --- 3. Plotta och spara resultat ---

fig_map = px.scatter_mapbox(
    df_gps_filtered,
    lat="Latitude",
    lon="Longitude",
    zoom=12,
    height=600,
    title="Filtered GPS Track within Code1 Area"
)
fig_map.update_layout(mapbox_style="open-street-map")
fig_map.write_image("filtered_gps_track.png")

plt.figure(figsize=(10,5))
plt.plot(df_vib_filtered['timestamp'], df_vib_filtered['Vibration1'], label='Vibration Channel 1')
plt.plot(df_vib_filtered['timestamp'], df_vib_filtered['Vibration2'], label='Vibration Channel 2')
plt.xlabel("Timestamp")
plt.ylabel("Acceleration")
plt.title("Filtered Vibration Signals (Data2 in Code1 track)")
plt.legend()
plt.tight_layout()
plt.savefig("filtered_vibrations.png")
plt.close()

print("Plots saved as filtered_gps_track.png and filtered_vibrations.png")


Bounding box with margin: lat 59.51087815584757 to 62.00906511297079, lon 13.518605115540874 to 16.35274052798546
Filtered GPS points (within expanded Code1 track): 11286
Original GPS points: 36000
Filtered GPS points (within Code1 track): 0
Original vibration samples: 35999954
Filtered vibration samples: 0
Plots saved as filtered_gps_track.png and filtered_vibrations.png


In [23]:
print("Data1 (Code1_df) Latitude range:", code1_df['Latitude'].min(), "to", code1_df['Latitude'].max())
print("Data1 (Code1_df) Longitude range:", code1_df['Longitude'].min(), "to", code1_df['Longitude'].max())

print("Data2 (df_gps) Latitude range:", df_gps['Latitude'].min(), "to", df_gps['Latitude'].max())
print("Data2 (df_gps) Longitude range:", df_gps['Longitude'].min(), "to", df_gps['Longitude'].max())


Data1 (Code1_df) Latitude range: 60.51087815584757 to 61.00906511297079
Data1 (Code1_df) Longitude range: 14.518605115540874 to 15.35274052798546
Data2 (df_gps) Latitude range: 59.0777655 to 59.8727415
Data2 (df_gps) Longitude range: 14.9900396 to 15.3790746
