In [1]:
import sys
import os 
from pathlib import Path
import pandas as pd
from bs4 import BeautifulSoup 
import requests
import re
import json

project_path = Path(os.path.dirname(os.path.realpath("__file__"))).parent

from scripts.utils import log_df

In [11]:

raw_error_path = project_path/'raw_data'/'dwarfii'/'error_codes.html'
raw_status_path = project_path/'raw_data'/'dwarfii'/'status_codes.html'


# dwarf docs

Web scraping does not work because docs are generated by javascript.

The cms for the docs only renders the portion that is currently seen on screen. Need to load the page in the browser, copy the table in the dev console to get the rows that are visible. Then scroll, copy, scroll, copy... to get the html for all the rows. Then use beautiful soup to parse the html, and pandas to get rid of duplicates.

In [7]:
def process_html(soup):
    regex = re.compile('.*author.*')
    records = []
    
    for index, row in enumerate(soup.find_all('tr')):
        if index == 0:
            continue
            
        data = {}
        for index, span in enumerate(row.find_all('span', regex)):
            if index == 0:
                key = 'code'
            elif index == 1:
                key = 'value'
            else:
                key = 'description'
                
            data[key] = span.text
        if 'value' in data:
            records.append(data)

    return records
         

## error codes

In [9]:
with open(raw_error_path) as fp:
    soup = BeautifulSoup(fp, 'html.parser')

records = process_html(soup)
len(records)

148

In [24]:
df = pd.DataFrame(records)
df.drop_duplicates(inplace=True)
log_df(df)

(65, 3)


Unnamed: 0,code,value,description
0,RESULT_OK,0,operate successfully
1,RESULT_CAM_OPENED,-1,The camera is on
2,RESULT_CAM_CLOSED,-2,The camera is off
3,RESULT_CAM_RECORDING,-3,On video
4,RESULT_CAM_TAKING_PHOTO,-4,Being photographed


In [25]:

results = {}
for index, row in df.iterrows():
    results[str(row['value'])] = re.sub('\n +', ' ', row['description'])

results

{'0': 'operate successfully',
 '-1': 'The camera is on',
 '-2': 'The camera is off',
 '-3': 'On video',
 '-4': 'Being photographed',
 '-5': 'Motor in motion',
 '-6': 'Motor stopped',
 '-7': 'Motor instruction failed to be sent',
 '-8': 'anorama scanning is enabled',
 '-9': 'The motor stops first and then moves, which will cause the motor to stop',
 '-10': 'The beat is suspended',
 '-11': 'Autofocus',
 '-12': 'Tracing algorithm is not initialized',
 '-13': 'Tracing algorithm initialization',
 '-14': 'The tracing algorithm is initialized',
 '-15': 'Under tracking',
 '-16': 'OTA upgrade',
 '-17': 'JPG failed to convert to FITS. Procedure',
 '-18': 'Plate Solving failed',
 '-19': 'Level correction failure',
 '-20': 'Bluetooth configuration in wifi',
 '-21': 'The wifi has been configured',
 '-22': 'Motor reaches limit',
 '-23': 'The microSD is not detected',
 '-24': 'Motor not reset',
 '-39': 'The Bluetooth wifi configuration fails. Procedure',
 '-25': 'OTA upgrade error',
 '-26': 'Low elec

## status codes

In [26]:
with open(raw_status_path) as fp:
    soup = BeautifulSoup(fp, 'html.parser')

records = process_html(soup)
len(records)

10

In [27]:
df = pd.DataFrame(records)
df.drop_duplicates(inplace=True)
log_df(df)

(10, 3)


Unnamed: 0,code,value,description
0,STATE_CALIBRATION_START,1000,Start correction
1,STATE_CALIBRATION_PLATE_SOLVING,1001,Correcting Plate Solving
2,STATE_CALIBRATION_FAILED,1002,Correction failure
3,STATE_GOTO_START,1003,Start GOTO
4,STATE_GOTO_PLATE_SOLVING,1004,GOTO Plate Solving


In [28]:

results = {}
for index, row in df.iterrows():
    results[str(row['value'])] = re.sub('\n +', ' ', row['description'])

results

{'1000': 'Start correction',
 '1001': 'Correcting Plate Solving',
 '1002': 'Correction failure',
 '1003': 'Start GOTO',
 '1004': 'GOTO Plate Solving',
 '1005': 'Start tracking',
 '1006': 'GOTO failure',
 '1007': 'Stopping Astronomy function',
 '1008': 'End of astronomy function',
 '1009': 'Correct successfully'}