In [1]:
import pandas as pd
import numpy as np
df = pd.read_csv('processed_data_davi 2.csv', dtype={
    "ICAO 24-bit code": 'str',
    "callsign": 'str',
    "origin country": 'str',
    "time at position": 'str',
    "time of last contact": 'str',
    "longitude": 'str',
    "latitude": 'str',
    "barometric altitude": 'float',
    "aircraft is grounded": 'bool',
    "velocity": 'float',
    "heading": 'float',
    "vertical rate": 'float',
    "geo_altitude": 'float',
    "squawk": 'float',
    "spi": 'bool',
    "latitude, longitude": 'str',
    "location": 'str',
    "country": 'str',
    "oblast": 'str',
}, date_parser = pd.to_datetime,
parse_dates=['time at position', 'time of last contact'])

In [2]:
df[df.country == 'Ukraine'].oblast.unique()

array([], dtype=object)

In [3]:
df.latitude = round(df.latitude.astype(float),4).astype(str)

In [4]:
df.longitude = round(df.longitude.astype(float),4).astype(str)

In [5]:
df.country = df.country[df.country.str[0] == " "].str[1:]

In [6]:
df['date'] = df['time at position'].apply(lambda x: str(x.date()))
df['flight-id'] = df['ICAO 24-bit code'] + df['callsign'] + df['date']

In [7]:
df.loc[df.country == "Hungary", "oblast"] = df[df.country == "Hungary"].location.str.split(",").apply(lambda x: [e for ind, e in enumerate(x[1:]) if ('járás' in x[ind])][0])

In [8]:
df.loc[df.country == "Poland", "oblast"] = df[df.country == "Poland"].location.str.split(",").apply(lambda x: x[-2] if 'Voivodeship' in x[-2] else x[-3])

In [9]:
df.loc[df.country == "Romania", "oblast"] = df[df.country == "Romania"].location.str.split(",").apply(lambda x: x[-3] if any(char.isdigit() for char in x[-2]) else x[-2])

In [10]:
df.loc[df.country == "Bulgaria", "oblast"] = df[df.country == "Bulgaria"].location.str.split(",").apply(lambda x: x[-3] if any(char.isdigit() for char in x[-2]) else x[-2])

In [11]:
df.loc[df.country == "Russia", "oblast"] = df[df.country == "Russia"].location.str.split(",").apply(lambda x: [e for e in x if ('Oblast' in e)][0] if len([e for e in x if ('Oblast' in e)]) > 0 else None)

In [12]:
df.loc[df.country == "Belarus", "oblast"] = df[df.country == "Belarus"].location.str.split(",").apply(lambda x: [e for e in x if ('Region' in e)][0] if len([e for e in x if ('Region' in e)]) > 0 else None)

In [13]:
df.loc[df.country == "Slovakia", "oblast"] = df[df.country == "Slovakia"].location.str.split(",").apply(lambda x: [e for e in x if ('Region' in e)][0])

In [14]:
df.loc[df.country == "Serbia", "oblast"] = df[df.country == "Serbia"].location.str.split(",").apply(lambda x: x[-3] if any(char.isdigit() for char in x[-2]) else x[-2])

In [15]:
df.oblast = df.oblast[df.oblast.str[0] == " "].str[1:]

In [16]:
df.groupby(['country','oblast'], dropna=False)['flight-id'].nunique().to_csv("oblasts.csv")

In [17]:
df['latitude, longitude'] = df[['latitude', 'longitude']].values.tolist()

In [18]:
gbdf = df.groupby(['flight-id','origin country','date'], dropna=False).agg({
    "barometric altitude": np.max,
    "velocity": np.max,
    "vertical rate": np.max,
    "oblast": lambda x: set(x),
    "country": lambda x: set(x),
    "latitude, longitude": lambda x: list(x)
})

In [None]:
df[df.country == 'Ukraine'].to_csv('processed_data_davi.csv', index=False) 

In [None]:
gbdf.to_csv("flights_separate.csv")