# Cities

In [None]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import re
from math import radians, sin, cos, sqrt, asin
from datetime import datetime, timedelta
from pytz import timezone
import json
# import pyowm
import sqlalchemy as db
import time

def lambda_handler(event, context):
  schema="mydb"
  host="YOUR_ENDPOINT"
  user="admin"
  password="YOUR_PW"
  port=3306
  con = f'mysql+pymysql://{user}:{password}@{host}:{port}/{schema}'
  
  city_names = ['Berlin', 'Vienna', 'Rome', 'Paris']
  country = []
  coordinates = []
  population = []
  
  def extract_population(text):
    numbers = re.findall(r'\b\d{1,3}(?:,\d{3})+\b', text)
    return max(map(lambda num: int(num.replace(',', '')), numbers))
    
  for city in city_names:
    url = f"https://en.wikipedia.org/wiki/{city}"

    response = requests.get(url) 
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, "html.parser")
        # selecting country
        for s in soup.select("table.infobox tbody tr th"):
            if s.text == "Country":
                try:
                    country.append(s.find_next_sibling("td").select("a")[0].get_text())
                except:
                    country.append(s.find_next_sibling("td").get_text())
                break
        # selecting coordinates
        for s in soup.select("table.infobox tbody tr td"):
            if s.text.startswith("Coordinates: "):
                coordinates.append(s.text.split("/")[-1].split(";"))
        # selecting population
        population.append(extract_population(soup.find('table', class_='infobox').text))
      
  cities_df = pd.DataFrame(
    {"City" : city_names,
    "Country" : country,
    "Coordinates" : coordinates,
    }
)
  
  def clean_coordinates(coord):
    return f"{coord[0]}, {coord[1]}"
 
  def extract_lat(coord):
    return float(coord.strip().split(",")[0])

  def extract_lon(coord):
    return float(coord.strip().split(",")[1])      
    
  cities_df['Coordinates'] = cities_df['Coordinates'].apply(clean_coordinates)

  cities_df['Latitude'] = cities_df['Coordinates'].apply(extract_lat)
  cities_df['Longitude'] = cities_df['Coordinates'].apply(extract_lon)

  cities_df['Coordinates'] = cities_df['Coordinates'].str.strip()
  
  cities_df["city_id"] = range(1, len(cities_df) + 1)
  
  engine = db.create_engine("mysql+pymysql://admin:YOUR_PW@YOUR_ENDPOINT/mydb?charset=utf8mb4", echo=True)
  
  connection = engine.connect()
  
  cities_df.to_sql("cities_df", connection, if_exists="append", index = False)
  
  return {
  'statusCode': 200,
  'body': json.dumps('Hello from Lambda!')}

# Airports

In [None]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import re
from math import radians, sin, cos, sqrt, asin
from datetime import datetime, timedelta
from pytz import timezone
import json
import pyowm
import sqlalchemy as db
import time

def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    r = 6371 # Radius of earth in kilometers. Use 3956 for miles
    return c * r

def get_nearest_airports(lat, lon):
    url = "https://aerodatabox.p.rapidapi.com/airports/search/location"
    querystring = {"lat": str(lat), "lon": str(lon), "radiusKm": "50", "limit": "10", "withFlightInfoOnly": "true"}
    headers = {
        "X-RapidAPI-Key": "YOUR_KEY",
        "X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"
    }
    response = requests.get(url, headers=headers, params=querystring)
    return response.json()  

    if response.status_code != 200:
        print(f"Error - Status Code: {response.status_code}")
        print(f"Response Content: {response.text}")
        print('Problem with status code')

def lambda_handler(event, context):
    schema = "mydb"
    host = "YOUR_ENDPOINT"
    user = "admin"
    password = "YOUR_PW"
    port = 3306
    con = f'mysql+pymysql://{user}:{password}@{host}:{port}/{schema}'

    query = "SELECT * FROM `mydb`.cities_df"
    cities_df = pd.read_sql(query, con=con)

    coordinates_list = []

    for index, row in cities_df.iterrows():
        lat = row["Latitude"]
        lon = row["Longitude"]
        coordinates_list.append({"lat": lat, "lon": lon})

    airport_data = []

    for coord in coordinates_list:
        response_data = get_nearest_airports(coord["lat"], coord["lon"])
        airport_data.extend(response_data["items"])  

    airport_data_df = pd.DataFrame(airport_data)

    airport_data_df.rename(columns={'location': 'Coordinates'}, inplace=True)
    airport_data_df['Latitude'] = airport_data_df['Coordinates'].apply(lambda x: x['lat'])
    airport_data_df['Longitude'] = airport_data_df['Coordinates'].apply(lambda x: x['lon'])

    # Add a new column in airport_data_df to store the distances
    airport_data_df['Distance_to_city'] = 0.0

    # Calculate and update the distances
    for airport_index, airport_row in airport_data_df.iterrows():
        airport_lat = airport_row['Coordinates']['lat']  
        airport_lon = airport_row['Coordinates']['lon']  

        distances = []
        for city_index, city_row in cities_df.iterrows():
            city_lat = city_row['Latitude']
            city_lon = city_row['Longitude']
            distance = haversine(city_lon, city_lat, airport_lon, airport_lat)
            distances.append(distance)

        # Get the minimum distance to any city
        min_distance = min(distances)
        airport_data_df.at[airport_index, 'Distance_to_city'] = min_distance

    airport_data_df.rename(columns={'countryCode': 'Country_Code'}, inplace=True)
    airport_data_df.drop('Coordinates', axis=1, inplace=True)
    airport_data_df.drop_duplicates(subset=['icao'], inplace=True)  

    # Insert airport_data_df into a new table in the database
    engine = db.create_engine(con)
    airport_data_df.to_sql(name='airport_data', con=engine, if_exists='replace', index=False)

    duplicate_rows_icao = airport_data_df[airport_data_df.duplicated(subset=['icao'])]

    return {
        'statusCode': 200,
        'body': json.dumps('Hello from Lambda!')
    }

# Flights

In [None]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import re
from math import radians, sin, cos, sqrt, asin
from datetime import datetime, timedelta
from pytz import timezone
import json
import sqlalchemy as db
import time

def lambda_handler(event, context):
    schema = "mydb"
    host = "YOUR_ENDPOINT"
    user = "admin"
    password = "YOUR_PW"
    port = 3306
    con = f'mysql+pymysql://{user}:{password}@{host}:{port}/{schema}'

    # icao_list = ['EDDB', 'LOWW', 'LIRA', 'LIRF']
    
    query = "SELECT icao FROM mydb.airport_data"
    airport_data = pd.read_sql(query, con=con)
    icao_list = airport_data['icao'].tolist()

    flights_data_df = tomorrows_flight_arrivals(icao_list)

    flights_data_df['arrival_time_local'] = pd.to_datetime(flights_data_df['arrival_time_local'], utc=True)
    flights_data_df['arrival_time_local'] = flights_data_df['arrival_time_local'].dt.tz_convert('Europe/Berlin')

    flights_data_df['departure_time_local'] = pd.to_datetime(flights_data_df['departure_time_local'], utc=True)
    flights_data_df['departure_time_local'] = flights_data_df['departure_time_local'].dt.tz_convert('Europe/Berlin')

    engine = db.create_engine("mysql+pymysql://admin:YOUR_PW@YOUR_ENDPOINT/mydb?charset=utf8mb4", echo=True)
    connection = engine.connect()

    flights_data_df.to_sql("flights_data_df", connection, if_exists="append", index=False)

    return {
        'statusCode': 200,
        'body': json.dumps('Hello from Lambda!')
    }

time.sleep(60)
def tomorrows_flight_arrivals(icao_list):
    # Get today's date in Berlin timezone
    today = datetime.now().astimezone(timezone('Europe/Berlin')).date()
    # Calculate tomorrow's date
    tomorrow = (today + timedelta(days=1))

    # Initialize an empty list to store flight data
    list_for_df = []

    # Loop over each ICAO code in the input list
    for icao in icao_list:
        # Define the two time periods for which to fetch data
        times = [["00:00", "11:59"], ["12:00", "23:59"]]

        # Loop over each time period
        for time in times:
            # Construct the URL for the API request
            url = f"https://aerodatabox.p.rapidapi.com/flights/airports/icao/{icao}/{tomorrow}T{time[0]}/{tomorrow}T{time[1]}"
            # Define the query parameters for the API request
            querystring = {
                "withLeg": "true",
                "direction": "Arrival",
                "withCancelled": "false",
                "withCodeshared": "true",
                "withCargo": "false",
                "withPrivate": "false"
            }
            # Define the headers for the API request
            headers = {
                'x-rapidapi-host': "aerodatabox.p.rapidapi.com",
                'x-rapidapi-key': "YOUR_KEY"
            }
            # Make the API request
            response = requests.request("GET", url, headers=headers, params=querystring)
            # Parse the JSON response

            if response.status_code == 204:
                continue
            if response.status_code != 200:
                print(f"Error - Status Code: {response.status_code}")
                print(f"Response Content: {response.text}")
                print('Problem with status code')
                # pdb.set_trace()

            try:
                flights_json = response.json()
            except:
                # pdb.set_trace()
                pass  # Handle the exception appropriately if needed.

            # Loop over each flight in the response
            for flight in flights_json['arrivals']:
                # Initialize an empty dictionary to store flight data
                flights_dict = {}
                # Store the ICAO code and flight data in the dictionary
                flights_dict['arrival_icao'] = icao
                # Use the .get() method to avoid KeyError if a key doesn't exist in the dictionary
                flights_dict['arrival_time_local'] = flight['arrival'].get('scheduledTimeLocal', None)
                flights_dict['arrival_terminal'] = flight['arrival'].get('terminal', None)
                flights_dict['departure_city'] = flight['departure']['airport'].get('name', None)
                flights_dict['departure_icao'] = flight['departure']['airport'].get('icao', None)
                flights_dict['departure_time_local'] = flight['departure'].get('scheduledTimeLocal', None)
                flights_dict['airline'] = flight['airline'].get('name', None)
                flights_dict['flight_number'] = flight.get('number', None)
                # Store the current date in Berlin timezone
                flights_dict['data_retrieved_on'] = datetime.now().astimezone(timezone('Europe/Berlin')).date()
                # Append the flight dictionary to the list
                list_for_df.append(flights_dict)

    # Convert the list of flight dictionaries to a DataFrame and return it
    return pd.DataFrame(list_for_df)

# Weather

In [None]:
from bs4 import BeautifulSoup
import requests 
import pandas as pd
import re
from datetime import datetime, timedelta
from pytz import timezone
import json
import pyowm
import sqlalchemy as db
import time

def lambda_handler(event, context):
    schema="mydb"
    host="YOUR_ENDPOINT"
    user="admin"
    password="YOUR_PW"
    port=3306
    con = f'mysql+pymysql://{user}:{password}@{host}:{port}/{schema}'
    
    query = "SELECT City FROM mydb.cities_df"
    cities_df = pd.read_sql(query, con=con)
    city_names = cities_df['City'].tolist()
    
    def get_weather(city_names):
        API_key = "YOUR_KEY"
        weather = []

        for city_name in city_names:
            weather_data = requests.get(f"https://api.openweathermap.org/data/2.5/forecast?q={city_name}&appid={API_key}")
            weather_list = weather_data.json()["list"]

            for i in weather_list:
                dt_txt = i["dt_txt"]
                temp = i["main"]["temp"]
                feels_like = i["main"]["feels_like"]
                pressure = i["main"]["pressure"]
                humidity = i["main"]["humidity"]
                weather_description = i["weather"][0]["main"]
                weather_description_2 = i["weather"][0]["description"]
                wind_speed = i["wind"]["speed"]
                visibility = i["visibility"]
                try:
                    rain = i["rain"]["3h"]
                except KeyError:
                    rain = 0
                try:
                    snow = i["snow"]["3h"]
                except KeyError:
                    snow = 0

                row = {
                    'City': city_name,
                    'datetime': dt_txt,
                    'temp': temp,
                    'feels_like': feels_like,
                    'pressure': pressure,
                    'humidity': humidity,
                    'weather_description_main': weather_description,
                    'weather_description': weather_description_2,
                    'wind_speed': wind_speed,
                    'visibility': visibility,
                    'rain_duration': rain,
                    'snow_duration': snow
                }

                weather.append(row)

        weather_df = pd.DataFrame(weather)
        return weather_df

    # city_names = ["London", "Barcelona", "Berlin"]
    weather_df = get_weather(city_names)

    engine = db.create_engine("mysql+pymysql://admin:YOUR_PW@YOUR_ENDPOINT/mydb?charset=utf8mb4", echo=True)
    connection = engine.connect()
    weather_df.to_sql("weather_df", connection, if_exists="append", index=False)

    return {
        'statusCode': 200,
        'body': json.dumps('Hello from Lambda!')
    }