In [1]:
import pandas as pd
import time
from opencage.geocoder import OpenCageGeocode
from geopy.distance import geodesic

In [2]:
with open("geo_api.txt") as api_file:
    api_key = api_file.readline()

In [3]:
def get_coordinates(iata):
     geocoder = OpenCageGeocode(api_key)
     res = geocoder.geocode(f"{iata}")

     if res and len(res):
          for i in res:
               if i["components"]["_type"] == "aeroway":
                    lat = i['geometry']['lat']
                    lon = i['geometry']['lng']
                    return lat, lon    
     else:
          return None

In [4]:
df_original = pd.read_json(r"silver\flights.json")

df_draft = df_original.copy()

In [5]:
osl_lat, osl_lon = get_coordinates("OSL")

print(osl_lat, osl_lon)

60.1978654 11.0996754


In [6]:
distance_cache = {}

def get_distance(row):
    destination = row['destination']
    
    if destination not in distance_cache:

        coordinates = get_coordinates(destination)
        
        if coordinates is not None:
            lat, lon = coordinates
            distance_cache[destination] = geodesic((lat, lon), (osl_lat, osl_lon)).kilometers
        else:
            distance_cache[destination] = None
        
        time.sleep(1)  # Geocage got a 1 request per second limit
    return distance_cache[destination]

In [7]:
df_draft["distance"] = df_draft.apply(get_distance, axis=1)

In [8]:
df_draft

Unnamed: 0,@uniqueID,airline,flight_id,dom_int,arr_dep,destination,date,time,distance
0,12095902,TRK,TRK005,I,D,ESB,2023-12-15,14:00:00,2698.716192
1,12049936,BA,BA763,I,D,LHR,2023-12-20,10:45:00,1208.507876
2,12050222,SK,SK263,D,D,BGO,2023-12-20,11:35:00,325.722731
3,12050390,DY,DY374,D,D,TOS,2023-12-20,11:35:00,1117.954925
4,11983380,KL,KL1144,S,D,AMS,2023-12-20,11:40:00,959.846603
...,...,...,...,...,...,...,...,...,...
288,12052875,DY,DY534,D,D,SVG,2023-12-21,12:35:00,342.694040
289,12052728,SK,SK312,D,D,HAU,2023-12-21,12:50:00,344.090003
290,12053100,SK,SK484,S,D,ARN,2023-12-21,12:50:00,387.133892
291,12053125,DY,DY940,S,D,CPH,2023-12-21,12:55:00,519.246799


In [9]:
def get_emissions(row):
    
    distance = row["distance"]
    if distance == None:
        return None
    
    if distance >= 1500:
        return distance * 0.17
    else:
        return distance * 0.21


In [10]:
df_draft["emissions"] = df_draft.apply(get_emissions, axis=1)
df_draft

Unnamed: 0,@uniqueID,airline,flight_id,dom_int,arr_dep,destination,date,time,distance,emissions
0,12095902,TRK,TRK005,I,D,ESB,2023-12-15,14:00:00,2698.716192,458.781753
1,12049936,BA,BA763,I,D,LHR,2023-12-20,10:45:00,1208.507876,253.786654
2,12050222,SK,SK263,D,D,BGO,2023-12-20,11:35:00,325.722731,68.401774
3,12050390,DY,DY374,D,D,TOS,2023-12-20,11:35:00,1117.954925,234.770534
4,11983380,KL,KL1144,S,D,AMS,2023-12-20,11:40:00,959.846603,201.567787
...,...,...,...,...,...,...,...,...,...,...
288,12052875,DY,DY534,D,D,SVG,2023-12-21,12:35:00,342.694040,71.965748
289,12052728,SK,SK312,D,D,HAU,2023-12-21,12:50:00,344.090003,72.258901
290,12053100,SK,SK484,S,D,ARN,2023-12-21,12:50:00,387.133892,81.298117
291,12053125,DY,DY940,S,D,CPH,2023-12-21,12:55:00,519.246799,109.041828


In [11]:
df_draft.to_json(r"gold\flights.json", orient="records", indent=2)