In [None]:
import numpy as np
import pandas as pd
from pykalman import KalmanFilter
import matplotlib.pyplot as plt
import folium

In [None]:
df2 = pd.read_csv('flight_data.csv')

In [None]:
grouped = df2.groupby('Route')
# Create histograms for each route
for route, group in grouped:
    plt.figure() 
    plt.scatter(x=group['Latitude'],y=group['Longitude'])
    plt.title(f'Scatterplot for Route {route}')
    plt.grid(True)
    plt.show()

#### Routes that need to smooth :
TLV-ADD, TLV-AUH, TLV-BAH, TLV-BKK, TLV-BOS, TLV-DEL, TLV-EVN, TLV-GYD, TLV-HKG, TLV-HKT, TLV-IAD, TLV-ICN, 
TLV-JFK, TLV-JNB, TLV-KEF, TLV-KIV, TLV-LAX, TLV-MIA, TLV-NRT, TLV-ORD, TLV-PEK, TLV-PVG, TLV-SEZ, TLV-SFO,
TLV-SKD, TLV-SSH, TLV-TAS, TLV-TBS, TLV-TZX, TLV-YYZ, TLV-ZNZ

In [None]:
csv_files=[
    './CSV/TLV-ADD_ETH415.csv',
    './CSV/TLV-AUH_EY594.csv',
    './CSV/TLV-BAH_GF973.csv',
    './CSV/TLV-BKK_ELY83.csv',
    './CSV/TLV-BOS_LY15.csv',
    './CSV/TLV-DEL_AIC140.csv',
    './CSV/TLV-EVN_ISR823.csv',
    './CSV/TLV-GYD_AHY22.csv',
    './CSV/TLV-HKG_CX676.csv',
    './CSV/TLV-HKT_ELY87.csv',
    './CSV/TLV-IAD_UA73.csv',
    './CSV/TLV-ICN_KAL958.csv',
    './CSV/TLV-JFK_LY11.csv',
    './CSV/TLV-JNB_ELY51.csv',
    './CSV/TLV-KEF_ICE551.csv',
    './CSV/TLV-KIV_FIA448.csv',
    './CSV/TLV-LAX_LY5.csv',
    './CSV/TLV-MIA_LY17.csv',
    './CSV/TLV-NRT_ELY91.csv',
    './CSV/TLV-ORD_UA141.csv',
    './CSV/TLV-PEK_CHH7958.csv',
    './CSV/TLV-PVG_CHH778.csv',
    './CSV/TLV-SEZ_SEY21.csv',
    './CSV/TLV-SFO_UA955.csv',
    './CSV/TLV-SKD_QNT5542.csv',
    './CSV/TLV-SSH_IZ1045.csv',
    './CSV/TLV-TAS_UZB304.csv',
    './CSV/TLV-TBS_IZ417.csv',
    './CSV/TLV-TZX_PC895.csv',
    './CSV/TLV-YYZ_AC81.csv',
    './CSV/TLV-ZNZ_ISR911.csv'
]

In [None]:
class KalmanFilter1D:
    def __init__(self, initial_state, initial_covariance, process_variance, measurement_variance):
        self.state = initial_state
        self.covariance = initial_covariance
        self.process_variance = process_variance
        self.measurement_variance = measurement_variance

    def predict(self):
        # Prediction step
        self.state = self.state  # If there is no prediction model, simply update state without prediction
        self.covariance += self.process_variance

    def update(self, measurement):
        # Update step
        kalman_gain = self.covariance / (self.covariance + self.measurement_variance)
        self.state += kalman_gain * (measurement - self.state)
        self.covariance *= (1 - kalman_gain)

    def smooth(self, measurements):
        smoothed_data = []
        for measurement in measurements:
            self.predict()
            self.update(measurement)
            smoothed_data.append(self.state)
        return smoothed_data

In [None]:
# Loop over the filenames, read the CSV files, apply Kalman filter, and save the updated files
for filename in csv_files:
    # Step 1: Read the CSV file and store it in a DataFrame
    df = pd.read_csv(filename)

    # Step 2: Extract latitude and longitude data
    latitudes = df['Latitude']
    longitudes = df['Longitude']

    # Step 3: Initialize Kalman filters for latitude and longitude
    initial_state = latitudes[0]
    initial_covariance = 1.0
    process_variance = 0.01
    measurement_variance = 10
    kalman_filter_latitude = KalmanFilter1D(initial_state, initial_covariance, process_variance, measurement_variance)
    kalman_filter_longitude = KalmanFilter1D(initial_state, initial_covariance, process_variance, measurement_variance)

    # Step 4: Smooth latitude and longitude data using Kalman filter
    smoothed_latitudes = kalman_filter_latitude.smooth(latitudes)
    smoothed_longitudes = kalman_filter_longitude.smooth(longitudes)

    # Step 5: Update the DataFrame with smoothed data
    df['Latitude'] = smoothed_latitudes
    df['Longitude'] = smoothed_longitudes

    # Step 6: Save the updated DataFrame back to the CSV file
    df.to_csv(filename, index=False)

In [None]:
smoothed_data = pd.read_csv('Smoothed_data.csv')

In [None]:
smoothed_data.head()

In [None]:
grouped = smoothed_data.groupby('Route')
# Create histograms for each route
for route, group in grouped:
    plt.figure() 
    plt.scatter(x=group['Latitude'],y=group['Longitude'])
    plt.title(f'Scatterplot for Route {route}')
    plt.grid(True)
    plt.show()