# Imports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import display
%matplotlib inline
import seaborn as sns
import random
import folium
import math
sns.set_style('whitegrid')

from sklearn.cluster import KMeans

import warnings 
warnings.filterwarnings('ignore')

# Get the data

In [None]:
flight_data = pd.read_csv('Smoothed_data.csv')

In [None]:
flight_data.head()

In [None]:
flight_data.info()

In [None]:
rows_with_nan = flight_data[flight_data['Longitude'].isnull() | flight_data['Latitude'].isnull()]
flight_data.drop(rows_with_nan.index, inplace=True)

In [None]:
flight_data['feet'] = flight_data['feet'].str.replace(',','')
flight_data['Rate'] = flight_data['Rate'].str.replace(',','')

In [None]:
flight_data['feet'] = flight_data['feet'].astype(float)

# Grouping the data

In [None]:
grouped_df = flight_data.groupby(['Route','Airline','FlightNum','Aircraft','Date']).agg(lambda x: x.tolist()).reset_index()

In [None]:
grouped_df['Sequence'] = grouped_df.apply(lambda row: list(zip(row['Latitude'], row['Longitude'])), axis=1)

In [None]:
grouped_df.drop(['Latitude', 'Longitude','mph','feet','Course','Time','Kts','Rate','Reporting Facility'], axis=1, inplace=True)

In [None]:
grouped_df

# Subsequence of Europe

## Substract Europe destantions from the dataframe

In [None]:
selected_europe_routes = ['TLV - ADB','TLV - AMS','TLV - ARN','TLV - ATH','TLV - AYT','TLV - BCN','TLV - BEG','TLV - BER',
                         'TLV - BLQ','TLV - BRI','TLV - BRU','TLV - BUD','TLV - CDG','TLV - CLJ','TLV - CTA','TLV - DEB',
                         'TLV - DLM','TLV - DME','TLV - DUB','TLV - DUS','TLV - FCO','TLV - FKB','TLV - FMM','TLV - FRA',
                         'TLV - GVA','TLV - HEL','TLV - HER','TLV - IAS','TLV - IST','TLV - KEF','TLV - KIV','TLV - KRK',
                         'TLV - LCA','TLV - LHR','TLV - LIS','TLV - MAD','TLV - MAN','TLV - MLA','TLV - MUC','TLV - MXP',
                         'TLV - NAP','TLV - NCE','TLV - OPO','TLV - OTP','TLV - PFO','TLV - PMI','TLV - PRG','TLV - RHO',
                         'TLV - RIX','TLV - SOF','TLV - STR','TLV - SZG','TLV - TGD','TLV - TIA','TLV - TZX','TLV - VAR',
                         'TLV - VCE','TLV - VIE','TLV - VNO','TLV - VRN','TLV - WAW','TLV - ZAG','TLV - ZRH','TLV - ZTH']

In [None]:
route_filter_all = flight_data['Route'].isin(selected_europe_routes)

In [None]:
route_filter_group = grouped_df['Route'].isin(selected_europe_routes)

In [None]:
europe_df = flight_data[route_filter_all]
europe_df_group = grouped_df[route_filter_group]

In [None]:
europe_df

In [None]:
europe_df_group

In [None]:
import pandas as pd
import folium
from IPython.display import display


# Create a Folium map centered at an initial location
map_center = [51.5074, -0.1278]  # For example, London's coordinates
m = folium.Map(location=map_center, zoom_start=5)


# Iterate through each row and add markers with different colors for each sequence
for index, row in europe_df_group.iterrows():
    sequence = row['Sequence']
    
    for point in sequence:
        latitude, longitude = point
        folium.CircleMarker([latitude, longitude], radius=1, fill=True).add_to(m)

# Display the Folium map directly in the notebook or script
display(m)


### Calculating distance and angel difference between points in every sequence

In [None]:
# Haversine formula to calculate distance between two points given their latitude and longitude
def haversine(lat1, lon1, lat2, lon2):
    R = 6371.0  # Earth's radius in kilometers
    dlat = math.radians(lat2 - lat1)
    dlon = math.radians(lon2 - lon1)
    a = math.sin(dlat / 2) ** 2 + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * math.sin(dlon / 2) ** 2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    distance = R * c
    return distance

In [None]:
# Calculate the delta (change) in distance between consecutive rows for each route
deltas = []

# Group the DataFrame by 'Route'
grouped = europe_df.groupby('Route')

for route, group in grouped:
    deltas.append(0)  # Set the first value for each route to '0'
    for i in range(len(group) - 1):
        lat1 = group.iloc[i, group.columns.get_loc('Latitude')]
        lon1 = group.iloc[i, group.columns.get_loc('Longitude')]
        lat2 = group.iloc[i + 1, group.columns.get_loc('Latitude')]
        lon2 = group.iloc[i + 1, group.columns.get_loc('Longitude')]
        distance_delta = haversine(lat1, lon1, lat2, lon2)
        rounded_delta = round(distance_delta, 2)
        deltas.append(rounded_delta)

europe_df['Delta'] = deltas

In [None]:
# Function to calculate the angle between two points given their latitude and longitude
def calculate_angle(lat1, lon1, lat2, lon2):
    dlat = math.radians(lat2 - lat1)
    dlon = math.radians(lon2 - lon1)
    y = math.sin(dlon) * math.cos(math.radians(lat2))
    x = math.cos(math.radians(lat1)) * math.sin(math.radians(lat2)) - math.sin(math.radians(lat1)) * math.cos(math.radians(lat2)) * math.cos(dlon)
    angle_rad = math.atan2(y, x)
    angle_deg = math.degrees(angle_rad)
    return angle_deg

In [None]:
# Calculate the delta (change) in angle between consecutive rows for each route
angles = []

# Group the DataFrame by 'Route'
grouped = europe_df.groupby('Route')

for route, group in grouped:
    angles.append(0)  # Set the first value for each route to '0'
    for i in range(len(group) - 1):
        lat1 = group.iloc[i, group.columns.get_loc('Latitude')]
        lon1 = group.iloc[i, group.columns.get_loc('Longitude')]
        lat2 = group.iloc[i + 1, group.columns.get_loc('Latitude')]
        lon2 = group.iloc[i + 1, group.columns.get_loc('Longitude')]
        angle_delta = calculate_angle(lat1, lon1, lat2, lon2)
        rounded_angle = round(angle_delta,2)
        angles.append(rounded_angle)

europe_df['Angle'] = angles

In [None]:
# Create an empty list to store rows with angle changes greater than 25 degrees
delta_changes = []

# Group the DataFrame by 'Route'
grouped = europe_df.groupby('Route')

for route, group in grouped:
    delta_changes.append(0)  # Set the first value for each route to '0'
    for i in range(1, len(group)):
        current_delta = group.iloc[i, group.columns.get_loc('Delta')]
        previous_delta = group.iloc[i - 1, group.columns.get_loc('Delta')]
        delta_change = abs(current_delta - previous_delta)
        delta_changes.append(delta_change)

europe_df['DeltaChange'] = delta_changes

In [None]:
# Create an empty list to store rows with angle changes greater than 25 degrees
angle_changes = []

# Group the DataFrame by 'Route'
grouped = europe_df.groupby('Route')

for route, group in grouped:
    angle_changes.append(0)  # Set the first value for each route to '0'
    for i in range(1, len(group)):
        current_angle = group.iloc[i, group.columns.get_loc('Angle')]
        previous_angle = group.iloc[i - 1, group.columns.get_loc('Angle')]
        angle_change = abs(current_angle - previous_angle)
        angle_changes.append(angle_change)

europe_df['AngleChange'] = angle_changes

In [None]:
for route, group in europe_df.groupby('Route'):
    plt.figure()  # Create a new figure for each route's histogram
    plt.hist(group['DeltaChange'], bins=60, edgecolor='black', alpha=0.7)
    plt.title(f'Histogram for Route {route}')
    plt.xlabel('Delta Change')
    plt.ylabel('Frequency')
    plt.grid(True)
    plt.show()

In [None]:

# Create histograms for each route
for route, group in europe_df.groupby('Route'):
    plt.figure()  # Create a new figure for each route's histogram
    plt.hist(group['AngleChange'], bins=60, edgecolor='black', alpha=0.7)
    plt.title(f'Histogram for Route {route}')
    plt.xlabel('Angle Change')
    plt.ylabel('Frequency')
    plt.grid(True)
    plt.show()

In [None]:
# Extract latitude and longitude coordinates from the 'sequence' column
latitude_longitude = europe_df_group['Sequence'].apply(pd.Series)


# Get the route names from the original DataFrame
route_names = europe_df_group['Route']

# Iterate through each row and plot each sequence separately with route names as titles
for index, row in latitude_longitude.iterrows():
    latitudes = row.str[0]
    longitudes = row.str[1]
    
    route_name = route_names[index]
    
    plt.figure(figsize=(10, 6))  # Adjust the figure size as needed
    plt.scatter(longitudes, latitudes, marker='o', alpha=0.5)
    plt.title(f'{route_name}')
    plt.xlabel('Longitude')
    plt.ylabel('Latitude')
    plt.grid(True)
    plt.show()

## Routes we found with strange turns, angles:
### TLV - ARN, TLV - ATH , TLV - AYT, TLV - BER, TLV _ BUD, TLV - CLJ, TLV - DEB, TLV - DLM, TLV - DME, TLV - FMM , TlV - HEl, TLV - IAS, TLV - IST, TLV - KIV, TLV - KRK, TLV - LCA, TLV - MAD, TLV - OPO, TLV - OTP, TLV - PMI, TLV - RHO, TLV - RIX,TLV - TZX, TLV - VAR, TLV - VNO, TLV - WAW

In [None]:
### Substract those flights.
selected_europe_angles = ['TLV - ARN', 'TLV - ATH' , 'TLV - BER', 'TLV _ BUD', 'TLV - CLJ',
                          'TLV - DEB', 'TLV - DME', 'TLV - FMM' , 'TlV - HEl','TLV - IAS'
                          , 'TLV - KIV', 'TLV - KRK', 'TLV - LCA', 'TLV - MAD', 'TLV - OPO',
                          'TLV - OTP', 'TLV - PMI', 'TLV - RHO', 'TLV - RIX','TLV - TZX', 'TLV - VAR', 'TLV - VNO', 'TLV - WAW']

In [None]:
route_filter_group = europe_df_group['Route'].isin(selected_europe_angles)

In [None]:
europe_df_angel = europe_df_group[route_filter_group]

In [None]:
europe_df_angel

In [None]:
import pandas as pd
import folium
from IPython.display import display


# Create a Folium map centered at an initial location
map_center = [51.5074, -0.1278]  # For example, London's coordinates
m = folium.Map(location=map_center, zoom_start=5)

# Define a list of colors
colors = ['blue', 'green', 'red', 'purple', 'orange', 'pink', 'gray', 'brown', 'cyan', 'magenta',
          'lightblue', 'lightgreen', 'lightred', 'lightpurple', 'lightorange', 'lightpink',
          'lightgray', 'lightbrown', 'lightcyan', 'lightmagenta', 'darkblue', 'darkgreen',
          'darkred', 'darkpurple', 'darkorange', 'darkpink', 'darkgray', 'darkbrown', 'darkcyan', 'darkmagenta']

# Iterate through each row and add markers with different colors for each sequence
for index, row in europe_df_angel.iterrows():
    sequence = row['Sequence']
    color = colors[index % len(colors)]  # Get a color from the list
    
    for point in sequence:
        latitude, longitude = point
        folium.CircleMarker([latitude, longitude], radius=1, color=color, fill=True, fill_color=color).add_to(m)

# Display the Folium map directly in the notebook or script
display(m)


## Conclusions on the subsequences of Europe

#### We can see that flight the flights towards north Europe, like Stockholm, Helsiniki, Riga, Moscow, etc.. taking sharp turn north above the border of Slovakia-Hungary, that can cause from several events such as: Airspace Structure, Air Traffic Flow Management, Navigational Waypoints and more..

#### The flight to Debercen made rounds before landing can cause by multiple reasons: get rid of fuel, the lane was busy.

#### The flights to Madrid and Porto split arount Sardinia island although they are both on the same Longitude, and the flight to Porto heading a little bit north.

# Subsequence of Asia

In [None]:
selected_asia_routes = [ 'TLV - ADD', 'TLV - BKK', 'TLV - DEL', 'TLV - GYD', 'TLV - HKG', 'TLV - HKT',
                        'TLV - PEK','TLV - PVG',  'TLV - TAS', 'TLV - TZX','TLV - AMM','TLV - BAH','TLV - AUH',
                        'TLV - ICN',  'TLV - SKD', 'TLV - NRT']

In [None]:
route_filter_all = flight_data['Route'].isin(selected_asia_routes)

In [None]:
route_filter_group = grouped_df['Route'].isin(selected_asia_routes)

In [None]:
asia_df = flight_data[route_filter_all]
asia_df_group = grouped_df[route_filter_group]

In [None]:
asia_df

In [None]:
asia_df_group

In [None]:
import pandas as pd
import folium
from IPython.display import display


# Create a Folium map centered at an initial location
map_center = [40.5074, 30.1278]  # For example, London's coordinates
m = folium.Map(location=map_center, zoom_start=5)


# Iterate through each row and add markers with different colors for each sequence
for index, row in asia_df_group.iterrows():
    sequence = row['Sequence']
    
    for point in sequence:
        latitude, longitude = point
        folium.CircleMarker([latitude, longitude], radius=1, fill=True).add_to(m)

# Display the Folium map directly in the notebook or script
display(m)


In [None]:
# Calculate the delta (change) in distance between consecutive rows for each route
deltas = []

# Group the DataFrame by 'Route'
grouped = asia_df.groupby('Route')

for route, group in grouped:
    deltas.append(0)  # Set the first value for each route to '0'
    for i in range(len(group) - 1):
        lat1 = group.iloc[i, group.columns.get_loc('Latitude')]
        lon1 = group.iloc[i, group.columns.get_loc('Longitude')]
        lat2 = group.iloc[i + 1, group.columns.get_loc('Latitude')]
        lon2 = group.iloc[i + 1, group.columns.get_loc('Longitude')]
        distance_delta = haversine(lat1, lon1, lat2, lon2)
        rounded_delta = round(distance_delta, 2)
        deltas.append(rounded_delta)

asia_df['Delta'] = deltas

In [None]:
# Calculate the delta (change) in angle between consecutive rows for each route
angles = []

# Group the DataFrame by 'Route'
grouped = asia_df.groupby('Route')

for route, group in grouped:
    angles.append(0)  # Set the first value for each route to '0'
    for i in range(len(group) - 1):
        lat1 = group.iloc[i, group.columns.get_loc('Latitude')]
        lon1 = group.iloc[i, group.columns.get_loc('Longitude')]
        lat2 = group.iloc[i + 1, group.columns.get_loc('Latitude')]
        lon2 = group.iloc[i + 1, group.columns.get_loc('Longitude')]
        angle_delta = calculate_angle(lat1, lon1, lat2, lon2)
        rounded_angle = round(angle_delta,2)
        angles.append(rounded_angle)

asia_df['Angle'] = angles

In [None]:
# Create an empty list to store rows with angle changes greater than 25 degrees
delta_changes = []

# Group the DataFrame by 'Route'
grouped = asia_df.groupby('Route')

for route, group in grouped:
    delta_changes.append(0)  # Set the first value for each route to '0'
    for i in range(1, len(group)):
        current_delta = group.iloc[i, group.columns.get_loc('Delta')]
        previous_delta = group.iloc[i - 1, group.columns.get_loc('Delta')]
        delta_change = abs(current_delta - previous_delta)
        delta_changes.append(delta_change)

asia_df['DeltaChange'] = delta_changes

In [None]:
# Create an empty list to store rows with angle changes greater than 25 degrees
angle_changes = []

# Group the DataFrame by 'Route'
grouped = asia_df.groupby('Route')

for route, group in grouped:
    angle_changes.append(0)  # Set the first value for each route to '0'
    for i in range(1, len(group)):
        current_angle = group.iloc[i, group.columns.get_loc('Angle')]
        previous_angle = group.iloc[i - 1, group.columns.get_loc('Angle')]
        angle_change = abs(current_angle - previous_angle)
        angle_changes.append(angle_change)

asia_df['AngleChange'] = angle_changes

In [None]:
for route, group in asia_df.groupby('Route'):
    plt.figure()  # Create a new figure for each route's histogram
    plt.hist(group['DeltaChange'], bins=60, edgecolor='black', alpha=0.7)
    plt.title(f'Histogram for Route {route}')
    plt.xlabel('Delta Change')
    plt.ylabel('Frequency')
    plt.grid(True)
    plt.show()

In [None]:
# Create histograms for each route
for route, group in asia_df.groupby('Route'):
    plt.figure()  # Create a new figure for each route's histogram
    plt.hist(group['AngleChange'], bins=60, edgecolor='black', alpha=0.7)
    plt.title(f'Histogram for Route {route}')
    plt.xlabel('Angle Change')
    plt.ylabel('Frequency')
    plt.grid(True)
    plt.show()

In [None]:
# Extract latitude and longitude coordinates from the 'sequence' column
latitude_longitude = asia_df_group['Sequence'].apply(pd.Series)


# Get the route names from the original DataFrame
route_names = asia_df_group['Route']

# Iterate through each row and plot each sequence separately with route names as titles
for index, row in latitude_longitude.iterrows():
    latitudes = row.str[0]
    longitudes = row.str[1]
    
    route_name = route_names[index]
    
    plt.figure(figsize=(10, 6))  # Adjust the figure size as needed
    plt.scatter(longitudes, latitudes, marker='o', alpha=0.5)
    plt.title(f'{route_name}')
    plt.xlabel('Longitude')
    plt.ylabel('Latitude')
    plt.grid(True)
    plt.show()

## Conclusions on the subsequences of Asia


#### There is an airspace over which Israel cannot fly due to emission considerations

#### We can see that the flights from TLV to asia detinations taking two routes, southern destinations like India or Thailand the flight route will go southern from the arab countries of Iran, Iraq and Afghanistan and northern destinations like Armenia, South Korea, China and Japan will go northern from those arab countries. That cause happens because of political conflicts between Israel and those countries that means Israeli airlanes can not fly above them.

# Subsequence of America

In [None]:
selected_america_routes = ['TLV - ORD',   'TLV - IAD',  'TLV - BOS', 'TLV - ATL', 'TLV - YUL'
                       , 'TLV - LAX',  'TLV - YYZ', 'TLV - YVR',  'TLV - TRN'  ,'TLV - MIA'  , 'TLV - JFK']

In [None]:
route_filter_all = flight_data['Route'].isin(selected_america_routes)

In [None]:
route_filter_group = grouped_df['Route'].isin(selected_america_routes)

In [None]:
america_df = flight_data[route_filter_all]
america_df_group = grouped_df[route_filter_group]

In [None]:
america_df

In [None]:
america_df_group

In [None]:
import pandas as pd
import folium
from IPython.display import display


# Create a Folium map centered at an initial location
map_center = [51.5074, -0.1278]  # For example, London's coordinates
m = folium.Map(location=map_center, zoom_start=5)


# Iterate through each row and add markers with different colors for each sequence
for index, row in america_df_group.iterrows():
    sequence = row['Sequence']
    
    for point in sequence:
        latitude, longitude = point
        folium.CircleMarker([latitude, longitude], radius=1, fill=True).add_to(m)

# Display the Folium map directly in the notebook or script
display(m)


In [None]:
# Calculate the delta (change) in distance between consecutive rows for each route
deltas = []

# Group the DataFrame by 'Route'
grouped = america_df.groupby('Route')

for route, group in grouped:
    deltas.append(0)  # Set the first value for each route to '0'
    for i in range(len(group) - 1):
        lat1 = group.iloc[i, group.columns.get_loc('Latitude')]
        lon1 = group.iloc[i, group.columns.get_loc('Longitude')]
        lat2 = group.iloc[i + 1, group.columns.get_loc('Latitude')]
        lon2 = group.iloc[i + 1, group.columns.get_loc('Longitude')]
        distance_delta = haversine(lat1, lon1, lat2, lon2)
        rounded_delta = round(distance_delta, 2)
        deltas.append(rounded_delta)

america_df['Delta'] = deltas

In [None]:
# Calculate the delta (change) in angle between consecutive rows for each route
angles = []

# Group the DataFrame by 'Route'
grouped = america_df.groupby('Route')

for route, group in grouped:
    angles.append(0)  # Set the first value for each route to '0'
    for i in range(len(group) - 1):
        lat1 = group.iloc[i, group.columns.get_loc('Latitude')]
        lon1 = group.iloc[i, group.columns.get_loc('Longitude')]
        lat2 = group.iloc[i + 1, group.columns.get_loc('Latitude')]
        lon2 = group.iloc[i + 1, group.columns.get_loc('Longitude')]
        angle_delta = calculate_angle(lat1, lon1, lat2, lon2)
        rounded_angle = round(angle_delta,2)
        angles.append(rounded_angle)

america_df['Angle'] = angles

In [None]:
# Create an empty list to store rows with angle changes greater than 25 degrees
delta_changes = []

# Group the DataFrame by 'Route'
grouped = america_df.groupby('Route')

for route, group in grouped:
    delta_changes.append(0)  # Set the first value for each route to '0'
    for i in range(1, len(group)):
        current_delta = group.iloc[i, group.columns.get_loc('Delta')]
        previous_delta = group.iloc[i - 1, group.columns.get_loc('Delta')]
        delta_change = abs(current_delta - previous_delta)
        delta_changes.append(delta_change)

america_df['DeltaChange'] = delta_changes

In [None]:
# Create an empty list to store rows with angle changes greater than 25 degrees
angle_changes = []

# Group the DataFrame by 'Route'
grouped = america_df.groupby('Route')

for route, group in grouped:
    angle_changes.append(0)  # Set the first value for each route to '0'
    for i in range(1, len(group)):
        current_angle = group.iloc[i, group.columns.get_loc('Angle')]
        previous_angle = group.iloc[i - 1, group.columns.get_loc('Angle')]
        angle_change = abs(current_angle - previous_angle)
        angle_changes.append(angle_change)

america_df['AngleChange'] = angle_changes

In [None]:
for route, group in america_df.groupby('Route'):
    plt.figure()  # Create a new figure for each route's histogram
    plt.hist(group['DeltaChange'], bins=60, edgecolor='black', alpha=0.7)
    plt.title(f'Histogram for Route {route}')
    plt.xlabel('Delta Change')
    plt.ylabel('Frequency')
    plt.grid(True)
    plt.show()

In [None]:
# Create histograms for each route
for route, group in america_df.groupby('Route'):
    plt.figure()  # Create a new figure for each route's histogram
    plt.hist(group['AngleChange'], bins=60, edgecolor='black', alpha=0.7)
    plt.title(f'Histogram for Route {route}')
    plt.xlabel('Angle Change')
    plt.ylabel('Frequency')
    plt.grid(True)
    plt.show()

In [None]:
# Extract latitude and longitude coordinates from the 'sequence' column
latitude_longitude = america_df_group['Sequence'].apply(pd.Series)


# Get the route names from the original DataFrame
route_names = america_df_group['Route']

# Iterate through each row and plot each sequence separately with route names as titles
for index, row in latitude_longitude.iterrows():
    latitudes = row.str[0]
    longitudes = row.str[1]
    
    route_name = route_names[index]
    
    plt.figure(figsize=(10, 6))  # Adjust the figure size as needed
    plt.scatter(longitudes, latitudes, marker='o', alpha=0.5)
    plt.title(f'{route_name}')
    plt.xlabel('Longitude')
    plt.ylabel('Latitude')
    plt.grid(True)
    plt.show()

## Conclusions on the subsequences of America

#### There isnt a special pattern about the flight routes to America destinations