-
Notifications
You must be signed in to change notification settings - Fork 0
/
lambda_LineBot-htmlToDB.py
74 lines (62 loc) · 3 KB
/
lambda_LineBot-htmlToDB.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import boto3
from bs4 import BeautifulSoup as bs
from urllib.parse import unquote_plus
import re
# Initialize the boto3 client and resource outside the handler for potential reuse
s3_client = boto3.client('s3')
dynamodb = boto3.resource('dynamodb')
quiz_questions_table = dynamodb.Table('QuizQuestions')
def lambda_handler(event, context):
for record in event['Records']:
# Correctly retrieve the bucket name
bucket_name = record['s3']['bucket']['name']
# Decode the object key
key = unquote_plus(record['s3']['object']['key'])
# Get the content of the S3 object
try:
response = s3_client.get_object(Bucket=bucket_name, Key=key)
html_content = response['Body'].read().decode('utf-8')
except s3_client.exceptions.NoSuchKey:
print(f"The specified key does not exist: {key}")
continue
# Parse the HTML content
soup = bs(html_content, 'html.parser')
# Extract questions from the HTML
question_cards = soup.find_all("div", class_="card exam-question-card")
for question_card in question_cards:
# Extract the question number from the question title
question_number = re.search(r"Question #(\d+)", question_card.find("div", class_="card-header").text)
if question_number:
question_id = int(question_number.group(1))
else:
continue # If no question number is found, skip this question
question = __get_question(question_card)
answers = __get_answers(question_card)
correct_answer = __get_correct_answer(question_card)
options = {chr(65+i): answer for i, answer in enumerate(answers)}
# Store the extracted information into DynamoDB
response = quiz_questions_table.put_item(
Item={
'QuestionID': question_id, # Use the extracted question number as QuestionID
'Question': question,
'Options': options,
'CorrectAnswer': correct_answer
}
)
print(f"Successfully processed question number {question_id}.")
def __clean_string(string):
"""Clean up the string"""
string = re.sub(r"\s+", " ", string) # Replace multiple spaces with a single space
return string.strip()
def __get_question(question_card):
"""Extract the question from the question card"""
question = question_card.find("p", class_="card-text").text
return __clean_string(question)
def __get_answers(question_card):
"""Extract all answers from the question card"""
answers = [li.text for li in question_card.find_all("li", class_="multi-choice-item")]
return [__clean_string(answer) for answer in answers]
def __get_correct_answer(question_card):
"""Extract the correct answer from the question card"""
correct_answer = question_card.find("span", class_="correct-answer").text
return __clean_string(correct_answer)