In [1]:
import pandas as pd
df = pd.read_csv("world-airports.csv", na_values=[], keep_default_na=False)
df.shape

(83486, 24)

In [2]:
print(df.columns)

Index(['id', 'ident', 'type', 'name', 'latitude_deg', 'longitude_deg',
       'elevation_ft', 'continent', 'country_name', 'iso_country',
       'region_name', 'iso_region', 'local_region', 'municipality',
       'scheduled_service', 'gps_code', 'icao_code', 'iata_code', 'local_code',
       'home_link', 'wikipedia_link', 'keywords', 'score', 'last_updated'],
      dtype='object')


In [3]:
cols_to_remove = ['id', 'ident', 'local_region', 'score', 'last_updated', 'keywords', 'scheduled_service', 'gps_code', 'local_code']
df = df.drop(columns=cols_to_remove)
print(df.columns)

Index(['type', 'name', 'latitude_deg', 'longitude_deg', 'elevation_ft',
       'continent', 'country_name', 'iso_country', 'region_name', 'iso_region',
       'municipality', 'icao_code', 'iata_code', 'home_link',
       'wikipedia_link'],
      dtype='object')


In [4]:
df['type'].unique()

array(['large_airport', 'closed', 'medium_airport', 'small_airport',
       'seaplane_base', 'heliport', 'balloonport'], dtype=object)

In [5]:
filtered_df = df[df['type'].isin(['large_airport', 'medium_airport','small_airport'])]
filtered_df.shape

(47512, 15)

In [6]:
# Remove rows where iata_code is missing (NaN) or empty
filtered_df = filtered_df[filtered_df['iata_code'].notna() & (filtered_df['iata_code'] != '')]
filtered_df.shape

(8819, 15)

In [8]:
# schengen_countries = [
#     "Austria", "Belgium", "Bulgaria", "Croatia", "Czech Republic", "Denmark", "Estonia", "Finland",
#     "France", "Germany", "Greece", "Hungary", "Iceland", "Italy", "Latvia",
#     "Liechtenstein", "Lithuania", "Luxembourg", "Malta", "Netherlands",
#     "Norway", "Poland", "Portugal", "Romania", "Slovakia", "Slovenia", "Spain",
#     "Sweden", "Switzerland", "United Kingdom"
# ]

schengen_countries = [
    "Germany"
]

schengen_df = filtered_df[filtered_df['country_name'].isin(schengen_countries)]
schengen_df.shape

(83, 15)

In [9]:
schengen_df['country_name'].unique()

array(['Germany'], dtype=object)

In [10]:
schengen_df['type'].value_counts()

type
medium_airport    39
small_airport     34
large_airport     10
Name: count, dtype: int64

In [11]:
continent_map = {
    'EU': 'Europe',
    'AS': 'Asia',
    'AF': 'Africa',
}
schengen_df['continent'] = schengen_df['continent'].map(continent_map)
schengen_df['continent'].unique()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  schengen_df['continent'] = schengen_df['continent'].map(continent_map)


array(['Europe'], dtype=object)

In [12]:
schengen_df = schengen_df.sort_values(
    by=["country_name", "type"],
    ascending=[True, True]
)

In [13]:
# Save DataFrame to JSON
json_str = schengen_df.to_json(orient="records", force_ascii=False)

with open("germany_airports.json", "w", encoding="utf-8") as f:
    f.write(json_str)