In [1]:
import json
import time
from datetime import datetime, timedelta

import pytz

In [2]:
flight_plans_file = open("data/plans.json", "r", encoding="utf-8")
flight_plans_dict = json.load(flight_plans_file)
flight_plans_file.close()

In [3]:
def formatDate(timestamp, timezone, day_offset=0):
    tz = pytz.timezone(timezone)
    return {
        "date": (datetime.fromtimestamp(timestamp / 1000, tz) + timedelta(day_offset)).date().strftime("%Y-%m-%d"),
        "day": (datetime.fromtimestamp(timestamp / 1000, tz) + timedelta(day_offset)).date().strftime("%a"),
    }


def clean_fltDate(json_data):
    timezone_map = {
        "洛杉矶": "America/Los_Angeles",
        "纽约": "America/New_York",
        "旧金山": "America/Los_Angeles",
        "西雅图": "America/Los_Angeles",
        "底特律": "US/Michigan",
        "达拉斯": "America/Mexico_City",
    }

    for route in json_data:
        for (i, schedule) in enumerate(json_data[route]):
            if route == "洛杉矶-广州" and schedule["airline"] == "中国南方航空股份有限公司":
                date = formatDate(schedule["fltDate"], timezone_map[schedule["ori"]], -1)
            if route == "西雅图-上海" and schedule["airline"] == "达美航空公司":
                date = formatDate(schedule["fltDate"], timezone_map[schedule["ori"]], -1)
            if route == "底特律-上海" and schedule["airline"] == "达美航空公司":
                date = formatDate(schedule["fltDate"], timezone_map[schedule["ori"]], -1)
            else:
                date = formatDate(schedule["fltDate"], timezone_map[schedule["ori"]])

            json_data[route][i]["fltDate"] = date["date"]
            json_data[route][i]["fltDay"] = date["day"]

    return json_data

In [4]:
def translate(json_data):
    translate_map = {
        "中国东方航空公司": "China Eastern Airlines",
        "中国南方航空股份有限公司": "China Southern Airlines",
        "达美航空公司": "Delta Air Lines",
        "厦门航空有限公司": "Xiamen Air",
        "中国国际航空股份有限公司": "Air China",
        "美国航空公司": "American Airlines",
        "美国联合航空公司": "United Airlines",
        "技术经停": "Technical Stop",
        "首尔": "Seoul",
        "浦东": "Shanghai (PVG)",
        "首都": "Beijing (PEK)",
        "底特律": "Detroit",
        "纽约": "New York",
        "洛杉矶": "Los Angeles",
        "达拉斯": "Dallas",
        "西雅图": "Seattle",
        "旧金山": "San Francisco",
        "广州": "Guangzhou",
        "厦门": "Xiamen",
        "北京": "Beijing",
        "深圳": "Shenzhen",
        "福州": "Fuzhou",
        "上海": "Shanghai",
        "天津": "Tianjin",
    }

    string_data = json.dumps(json_data, ensure_ascii=False)

    for cn in translate_map:
        string_data = string_data.replace(cn, translate_map[cn])

    return json.loads(string_data)

In [5]:
def removeKey(json_data, keys_to_remove):
    for route in json_data:
        for (i, schedule) in enumerate(json_data[route]):
            for key in keys_to_remove:
                del json_data[route][i][key]

    return json_data

In [6]:
def write_to_file(d, file_name):
    output = open(file_name, "w", encoding="utf-8")
    json.dump(d, output, ensure_ascii=False)
    output.close()

In [7]:
flight_plans_dict = clean_fltDate(flight_plans_dict)
flight_plans_dict = translate(flight_plans_dict)
flight_plans_dict = removeKey(flight_plans_dict, ["fltSchedule", "importTime"])
write_to_file(flight_plans_dict, "./data/plans_cleaned_en.json")