<div style="text-align: center; background-color: #0A6EBD; font-family: 'Trebuchet MS', Arial, sans-serif; color: white; padding: 20px; font-size: 40px; font-weight: bold; border-radius: 0 0 0 0; box-shadow: 0px 6px 8px rgba(0, 0, 0, 0.2);">
  Data Science Project
  <hr style="border-top: 1px solid #0A6EBD;">
  <i>Investigating Meteor-Related Phenomena<br>and their Association with Light Pollution</i>
</div>

<div style="text-align: center; background-color: #5A96E3; font-family: 'Trebuchet MS', Arial, sans-serif; color: white; padding: 20px; font-size: 40px; font-weight: bold; border-radius: 0 0 0 0; box-shadow: 0px 6px 8px rgba(0, 0, 0, 0.2);">
  Stage I - Data collecting
</div>

# Data Source

## Meteors dataset

1. [Global Meteor Network (GMN) ](https://www.lightpollutionmap.info/) and its [custom API](https://bolides.readthedocs.io/en/latest/reference/index.html).

# Collecting Data and Push to MongoDB

## Selected data attributes



- `datetime`: Date and UTC time of the beginning of the meteor
- `Vavg`: Average geocentric velocity of the meteor
- `Duration`: Duration the meteor was observed
- `Peak` and `PeakHt`: Peak absolute magnitude and height of the meteor
- `Participating`: List of code of station that observed the meteor
- `Num`: Number of stations that observed the meteor
- `LatBeg` and `LonBeg`: The latitude and longtitude of the beginning of the meteor
- `LatEnd` and `LongEnd`: The latitude and longtitude of the end of the meteor
- `HtBeg` and `HtBeg`: The height of the beginning and the end of the meteor
- `iau_code`: The IAU shower code.
- `RAapp` and `DECapp`: The Right Ascestion and the declination of the radiant of the meteor.
- `Masskg`: Mass in kilograms of the meteor

In [1]:
usedAttributes = ["datetime", "Vavg", "Duration", "Peak", "PeakHt", "Participating", "Num", "LatBeg", "LonBeg", "LatEnd", "LonEnd", "HtBeg", "HtEnd", "iau_code", "RAapp", "DECapp", "Masskg"]

## Collect data from GMN

In [2]:
from bolides import BolideDataFrame
from datetime import date, timedelta
from pathlib import Path
import json
import logging
import sys

Set logger

In [3]:
logger = logging.getLogger(__name__)
logger.setLevel((logging.INFO))
handler = logging.FileHandler("log.txt")
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)

# Check if logger already has a handler
if not logger.handlers:
    logger.addHandler(handler)

Convert raw GMN data to .JSON format and push to database

In [4]:
def convertGMNRawToJSON(GMNRaw, filePath: Path):
    GMNRaw.to_json(filePath, date_format="iso", date_unit="s")

def pushGMNRawToDatabase(GMNRaw):
    pass

Collect data from GMN with predefined date

In [5]:
import sys


def getGMNRawOfDate(date: str):
    """Get the observed meteors from GMN source in a specific date ()

    Args:
        date (str): date are string with form YYYY-MM-DD

    Returns:
        Unknown: the raw data of meteors in the date
    """ 
    GMNRaw = BolideDataFrame(source = "gmn", date = date)

    return GMNRaw

def getGMNRawData(fromDate: date, toDate: date):
    """Get the observed meteors from GMN source in a rage of date
    Args:
        fromDate (date): the start date of the range, need to have year-month-day
        toDate (date): the end date of the range, need to have year-month-day
    """    
    delta = timedelta(days=1)
    while fromDate <= toDate:
        try:
            GMNRaw = getGMNRawOfDate(fromDate.strftime("%Y-%m-%d"))
        except:
            logger.error(f"GMN data of {fromDate.strftime('%Y-%m-%d')} cannot be retrieved: {sys.exc_info()[1]}")
            fromDate += delta
            continue

        GMNRaw = GMNRaw[usedAttributes]
        assert GMNRaw.columns.tolist() == usedAttributes

        filePath = Path(f"../data/raw/gmn/{fromDate.strftime('%Y-%m-%d')}.json")
        convertGMNRawToJSON(GMNRaw, filePath)
        
        fromDate += delta
        logger.info(f"GMN data of {fromDate.strftime('%Y-%m-%d')} downloaded")

In [None]:
getGMNRawData(date(2018, 12, 10), date(2023, 12, 31))

# Collecting data of light pollution

Set up

In [238]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
import re
import time
import numpy as np

Retrieve the brightness of coordinate

In [192]:
zoom = 10
URL = f"https://djlorenz.github.io/astronomy/lp2022/overlay/dark.html"
timeOut = 5

In [252]:
def extractBrightness(text):
    match = re.search(r"Brightness:\s+(\d+\.\d+)", text)

    if match:
        return match.group(1)

def initDriver():
    chrome_options = Options()
    chrome_options.add_argument("--headless") # Work around as compatibility issue between Chrome and Selenium. When add "--headless", uc uses headLess boolean itself. If it is not set, uc uses headLess boolean from chrome_options (which is now deprecated)
    chrome_options.page_load_strategy = 'none'
    driver = webdriver.Chrome(options = chrome_options) 

    driver.get(URL)
    driver.implicitly_wait(5)

    return driver

def getBrightness(driver, lat, lon):
    # Click the button with class "leaflet-searchbox-button leaflet-searchbox-button-right"
    WebDriverWait(driver, timeOut).until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".leaflet-searchbox-button.leaflet-searchbox-button-right"))).click()
    # Type from keyboard the lat and lon, then press enter
    ActionChains(driver).send_keys(f"{lat},{lon}").perform()
    ActionChains(driver).send_keys(u'\ue007').perform()
    # Click the mouse in the center of screen
    window_size = driver.get_window_size()
    position = (window_size['width'] / 2, window_size['height'] / 2)
    time.sleep(1)
    if (lat == -65 and lon == -360):
        ActionChains(driver).move_by_offset(*position).click().perform()
    
    # Get the div with class "leaflet-popup-content" then print the text
    driver.save_screenshot("screenshot.png")
    text = WebDriverWait(driver, timeOut).until(EC.visibility_of_element_located((By.CSS_SELECTOR, ".leaflet-popup-content"))).text

    return extractBrightness(text)

In [None]:
driver = initDriver()

lat_start, lat_end = -65, 75
lon_start, lon_end = -360, 360
step = 0.1

for lat in np.arange(lat_start, lat_end, step):
    for lon in np.arange(lon_start, lon_end, step):
        print(f"Latitude: {lat}, Longitude: {lon}, Brightness: {getBrightness(driver, lat, lon)}")

driver.quit()