In [1]:
import requests

def fetch_and_process_documents(docs_url):
    # Fetch raw documents data from the provided URL
    docs_response = requests.get(docs_url)
    documents_raw = docs_response.json()

    # Process the documents
    documents = []
    for course in documents_raw:
        course_name = course['course']
        for doc in course['documents']:
            doc['course'] = course_name
            documents.append(doc)

    return documents

In [2]:

docs_url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main/01-intro/documents.json?raw=1'
processed_documents = fetch_and_process_documents(docs_url)

In [3]:
import hashlib

def generate_document_id(doc):
    # combined = f"{doc['course']}-{doc['question']}"
    combined = f"{doc['course']}-{doc['question']}-{doc['text'][:10]}"
    hash_object = hashlib.md5(combined.encode())
    hash_hex = hash_object.hexdigest()
    document_id = hash_hex[:8]
    return document_id

In [4]:
for doc in processed_documents:
    doc['id'] = generate_document_id(doc)

In [5]:
processed_documents[3]

{'text': "You don't need it. You're accepted. You can also just start learning and submitting homework without registering. It is not checked against any registered list. Registration is just to gauge interest before the start date.",
 'section': 'General course-related questions',
 'question': 'Course - I have registered for the Data Engineering Bootcamp. When can I expect to receive the confirmation email?',
 'course': 'data-engineering-zoomcamp',
 'id': '0bbf41ec'}

In [6]:
from collections import defaultdict

In [7]:
hashes = defaultdict(list)

for doc in processed_documents:
    doc_id = doc['id']
    hashes[doc_id].append(doc)

In [8]:
len(hashes),len(processed_documents)

(947, 948)

In [9]:
for k, values in hashes.items():
    if len(values) > 1:
        print(k, len(values))

593f7569 2


In [10]:
hashes['593f7569']

[{'text': "They both do the same, it's just less typing from the script.\nAsked by Andrew Katoch, Added by Edidiong Esu",
  'section': '6. Decision Trees and Ensemble Learning',
  'question': 'Does it matter if we let the Python file create the server or if we run gunicorn directly?',
  'course': 'machine-learning-zoomcamp',
  'id': '593f7569'},
 {'text': "They both do the same, it's just less typing from the script.",
  'section': '6. Decision Trees and Ensemble Learning',
  'question': 'Does it matter if we let the Python file create the server or if we run gunicorn directly?',
  'course': 'machine-learning-zoomcamp',
  'id': '593f7569'}]

In [11]:
import json
with open('../data/documents-with-ids.json', 'wt') as f_out:
    json.dump(processed_documents, f_out, indent=2)

In [12]:
!head ../data/documents-with-ids.json

[
  {
    "text": "The purpose of this document is to capture frequently asked technical questions\nThe exact day and hour of the course will be 15th Jan 2024 at 17h00. The course will start with the first  \u201cOffice Hours'' live.1\nSubscribe to course public Google Calendar (it works from Desktop only).\nRegister before the course starts using this link.\nJoin the course Telegram channel with announcements.\nDon\u2019t forget to register in DataTalks.Club's Slack and join the channel.",
    "section": "General course-related questions",
    "question": "Course - When will the course start?",
    "course": "data-engineering-zoomcamp",
    "id": "c02e79ef"
  },
  {
    "text": "GitHub - DataTalksClub data-engineering-zoomcamp#prerequisites",


In [13]:
prompt_template = """
You emulate a student who's taking our course.
Formulate 5 questions this student might ask based on a FAQ record. The record
should contain the answer to the questions, and the questions should be complete and not too short.
If possible, use as fewer words as possible from the record. 

The record:

section: {section}
question: {question}
answer: {text}

Provide the output in parsable JSON without using code blocks:

["question1", "question2", ..., "question5"]
""".strip()

In [14]:
from dotenv import load_dotenv
import os
from openai import OpenAI


In [31]:
api_key = os.getenv('OPENAI_API_KEY')

if api_key:
    client = OpenAI(api_key=api_key)
else:
    print("Error: OPENAI_API_KEY environment variable not set.")
    

In [26]:
def generate_questions(doc):
    prompt = prompt_template.format(**doc)

    response = client.chat.completions.create(
        model='gpt-4o',
        messages=[{"role": "user", "content": prompt}]
    )

    json_response = response.choices[0].message.content
    return json_response

In [18]:
type(processed_documents)

list

In [27]:
for doc in processed_documents:
    questions = generate_questions(doc)
    print(f"Questions for document {doc['id']}:\n{questions}")

Questions for document c02e79ef:
[
  "When exactly does the course begin?",
  "How can I view the course schedule?",
  "What should I do before the course starts?",
  "Is there a communication channel for course announcements?",
  "Do I need to join any specific Slack channels for the course?"
]
Questions for document 1f6520ca:
{
  "question1": "What skills do I need before enrolling in this course?",
  "question2": "Where can I find the prerequisites for this course?",
  "question3": "Do I need prior knowledge of specific topics for this course?",
  "question4": "Is there a list of prerequisites for this course?",
  "question5": "How do I know if I meet the prerequisites for the course?"
}
Questions for document 7842b56a:
[
  "Is it possible to enroll after the course has started?",
  "Am I allowed to submit homework without registering initially?",
  "Are there any deadlines for final project submissions?",
  "Can I complete the course if I join late?",
  "Will I face any issues if I

In [28]:
from tqdm.auto import tqdm

def generate_document_questions(documents, generate_questions):
    """
    Generates questions for a list of documents and returns the results in a dictionary.
    
    Parameters:
    - documents: List of document dictionaries. Each dictionary should have an 'id' key.
    - generate_questions: Function to generate questions for a given document.
    
    Returns:
    - results: Dictionary mapping document IDs to generated questions.
    """
    results = {}
    
    for doc in tqdm(documents):
        doc_id = doc['id']
        
        if doc_id in results:
            continue

        questions = generate_questions(doc)
        results[doc_id] = questions
    
    return results

In [45]:
def parse_questions(data):
    parsed_data = {}
    for doc_id, questions in data.items():
        if isinstance(questions, list):
            parsed_data[doc_id] = questions
        elif isinstance(questions, dict):
            parsed_data[doc_id] = list(questions.values())
        else:
            raise TypeError(f"Unexpected format for document {doc_id}")

    return parsed_data


In [50]:
import json

def parse_questions(file_path):
    try:
        with open(file_path, 'r') as f:
            content = f.read()
        
        # Attempt to parse the content as JSON
        data = json.loads(content)
        
        parsed_data = {}
        for doc_id, questions in data.items():
            if isinstance(questions, list):
                parsed_data[doc_id] = questions
            elif isinstance(questions, dict):
                parsed_data[doc_id] = list(questions.values())
            else:
                raise TypeError(f"Unexpected format for document {doc_id}")

        return parsed_data
    
    except FileNotFoundError:
        print(f"File '{file_path}' not found.")
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON in '{file_path}': {e}")
    except Exception as e:
        print(f"Error processing file '{file_path}': {e}")

# Example usage: Parsing questions from a file
file_path = '../data/result.txt'  # Adjust the file path accordingly
parsed_data = parse_questions(file_path)

# Example usage: Print questions for each document if parsing was successful
if parsed_data:
    for doc_id, questions in parsed_data.items():
        print(f"Questions for document {doc_id}:")
        for idx, question in enumerate(questions, start=1):
            print(f"  Question {idx}: {question}")
        print()

Error decoding JSON in '../data/result.txt': Expecting value: line 1 column 1 (char 0)


In [48]:

def parse_questions(file_path):
    with open(file_path, 'r') as f:
        content = f.read()
    
    # Assuming content is in JSON format based on previous examples
    data = json.loads(content)
    
    parsed_data = {}
    for doc_id, questions in data.items():
        if isinstance(questions, list):
            parsed_data[doc_id] = questions
        elif isinstance(questions, dict):
            parsed_data[doc_id] = list(questions.values())
        else:
            raise TypeError(f"Unexpected format for document {doc_id}")

    return parsed_data

# Example usage: Parsing questions from a file
file_path = '../data/result.txt'  # Adjust the file path accordingly
parsed_data = parse_questions(file_path)

# Example usage: Print questions for each document
for doc_id, questions in parsed_data.items():
    print(f"Questions for document {doc_id}:")
    for idx, question in enumerate(questions, start=1):
        print(f"  Question {idx}: {question}")
    print()

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [44]:
import json

# Read the content of the document
with open("../data/result.txt", "r") as file:
    content = file.read()

# Split the content into separate sections
sections = content.split("Questions for document ")

# Parse the questions from each section
questions = {}
for section in sections[1:]:
    document_id, question_text = section.split("\n", 1)
    document_id = document_id.strip()
    
    if question_text.startswith("["):
        # Questions in a list format
        questions[document_id] = json.loads(question_text)
    elif question_text.startswith("{"):
        # Questions in a dictionary format
        questions[document_id] = json.loads(question_text)
    else:
        print(f"Unsupported question format for document {document_id}")

# Print the parsed questions
for document_id, document_questions in questions.items():
    print(f"Questions for document {document_id}:")
    print(document_questions)
    print()

JSONDecodeError: Invalid \escape: line 3 column 59 (char 153)

In [51]:
def parse_questions_from_text(text):
    lines = text.splitlines()
    parsed_data = {}
    current_doc_id = None
    current_questions = []

    for line in lines:
        line = line.strip()
        if line.startswith('Questions for document '):
            if current_doc_id and current_questions:
                parsed_data[current_doc_id] = current_questions
                current_questions = []

            current_doc_id = line.split(':')[1].strip()
        elif line.startswith('['):
          
            questions_list = eval(line)  # Unsafe, but simple for this example
            current_questions.extend(questions_list)
        elif line.startswith('{'):
            # Dictionary of questions
            questions_dict = eval(line)  # Unsafe, but simple for this example
            current_questions.extend(questions_dict.values())
    if current_doc_id and current_questions:
        parsed_data[current_doc_id] = current_questions
    
    return parsed_data

# Example usage:
text = """
Questions for document c02e79ef:
[
  "When exactly does the course begin?",
  "How can I view the course schedule?",
  "What should I do before the course starts?",
  "Is there a communication channel for course announcements?",
  "Do I need to join any specific Slack channels for the course?"
]
Questions for document 1f6520ca:
{
  "question1": "What skills do I need before enrolling in this course?",
  "question2": "Where can I find the prerequisites for this course?",
  "question3": "Do I need prior knowledge of specific topics for this course?",
  "question4": "Is there a list of prerequisites for this course?",
  "question5": "How do I know if I meet the prerequisites for the course?"
}
Questions for document 7842b56a:
[
  "Is it possible to enroll after the course has started?",
  "Am I allowed to submit homework without registering initially?",
  "Are there any deadlines for final project submissions?",
  "Can I complete the course if I join late?",
  "Will I face any issues if I delay registering for the course?"
]
Questions for document 0bbf41ec:
[  
  "When will I receive the confirmation email after registering for the Data Engineering Bootcamp?",  
  "Is it necessary to register before starting classes and submitting homework for the Data Engineering Bootcamp?",  
  "Do I need to wait for a confirmation email to begin the Data Engineering Bootcamp?",  
  "Is there a verification process for registered students before they start the Data Engineering Bootcamp?",  
  "Why do we register for the Data Engineering Bootcamp if it is not mandatory to begin the coursework?"  
]
"""

parsed_data = parse_questions_from_text(text)
for doc_id, questions in parsed_data.items():
    print(f"Questions for document {doc_id}:")
    for idx, question in enumerate(questions, start=1):
        print(f"  Question {idx}: {question}")
    print()

SyntaxError: unexpected EOF while parsing (<string>, line 1)

In [54]:
import json

# Sample document content
document_content = '''
Questions for document c02e79ef:
[
  "When exactly does the course begin?",
  "How can I view the course schedule?",
  "What should I do before the course starts?",
  "Is there a communication channel for course announcements?",
  "Do I need to join any specific Slack channels for the course?"
]
Questions for document 1f6520ca:
{
  "question1": "What skills do I need before enrolling in this course?",
  "question2": "Where can I find the prerequisites for this course?",
  "question3": "Do I need prior knowledge of specific topics for this course?",
  "question4": "Is there a list of prerequisites for this course?",
  "question5": "How do I know if I meet the prerequisites for the course?"
}
Questions for document 7842b56a:
[
  "Is it possible to enroll after the course has started?",
  "Am I allowed to submit homework without registering initially?",
  "Are there any deadlines for final project submissions?",
  "Can I complete the course if I join late?",
  "Will I face any issues if I delay registering for the course?"
]
Questions for document 0bbf41ec:
[  
  "When will I receive the confirmation email after registering for the Data Engineering Bootcamp?",  
  "Is it necessary to register before starting classes and submitting homework for the Data Engineering Bootcamp?",  
  "Do I need to wait for a confirmation email to begin the Data Engineering Bootcamp?",  
  "Is there a verification process for registered students before they start the Data Engineering Bootcamp?",  
  "Why do we register for the Data Engineering Bootcamp if it is not mandatory to begin the coursework?"  
]
Questions for document 63394d91:
[
"What preparations are necessary before the course begins?",
"Which tools need to be installed prior to starting the course?",
"Can I review the syllabus before the course starts?",
"Is setting up a Google cloud account required before the course?",
"Should I ensure familiarity with course subjects beforehand?"
]
Questions for document 2ed9b986:
[
  "How many Zoom Camps are there in a year?",
  "Which months is the Data-Engineering Zoomcamp scheduled for?",
  "Is there more than one live cohort per year for each Zoomcamp?",
  "What are the separate courses for each Zoomcamp in a year?",
  "Can I take the Zoomcamps at my own pace if I'm not interested in the Certificate?"
]
Questions for document 93e2c8ed:
["Will the 2024 cohort feature different tools from the previous cohorts?",
"Are there any changes to the 2024 edition in terms of software used?",
"Is Mage AI replacing any tools in the 2024 cohort?",
"Did the 2024 edition re-record any specific content?",
"What tool did the 2023 edition use instead of Airflow?"]
Questions for document a482086d:
[
  "Can I access the course materials after it ends?",
  "Will the homeworks still be available after the course finishes?",
  "Can I prepare for the next cohort once the course is over?",
  "Is it possible to work on my final capstone project post course?",
  "Can I continue the course at my own pace after it has ended?"
]
Questions for document eb56ae98:
["Can I get help if I choose the self-paced version of the course?",
"What resources should I check before asking a question in the slack channel?",
"Is there any tool provided to assist in searching through previously asked questions in the course?",
"What is the recommended method for searching the slack channel for answers?",
"Can the @ZoomcampQABot be fully relied upon for answering questions in the slack channel?"]
Questions for document 4292531b:
[
  "Which YouTube playlist has all the main videos for the course?",
  "Where can I find the thumbnails for each video in the course?",
  "Are the office hours videos available in the main playlist?",
  "What year-specific playlists should I check for additional videos?",
  "Where is the main playlist link mentioned in Slack?"
]
Questions for document ea739c65:
[
  "How many hours per week are recommended for this course?",
  "What is the weekly time commitment for someone without prior module experience?",
  "Can I customize the weekly hour estimate for my own schedule?",
  "Does the expected weekly study time vary based on background?",
  "How many hours might an experienced student spend weekly on this course?"
]
Questions for document cb257ee5:
["Do I get a certificate if I complete the course in self-paced mode?",
"Is it possible to receive a certificate without following a live cohort?",
"What are the conditions for obtaining a certificate in this course?",
"Why can't I receive a certificate for completing the self-paced mode?",
"Do I need to peer-review projects to earn a certificate?"]
Questions for document 04aa4897:
["Where can I access the link to join Office Hour sessions on YouTube?", "How do students participate in Office Hour sessions?", "Where will the video URL for Office Hour sessions be posted?", "Can students directly post questions in the YouTube chat during Office Hour sessions?", "How will questions be submitted during the Office Hour sessions?"]
Questions for document 9681be3b:
["If I miss the live Office Hours, can I watch it later?",
"Are the Office Hours sessions recorded for future viewing?",
"Will the Office Hours/workshop be available after their live sessions?",
"Can I rewatch the Office Hours sessions at my convenience?",
"If I'm unable to attend the workshop, will a recording be accessible?"]
Questions for document a1daf537:
[
  "Where can I find the latest deadlines for homework and projects?",
  "How can I stay updated on deadline extensions or other important news?",
  "What should I check for the most current project deadlines?",
  "Is there a way to know if the Instructor has updated the deadline for an assignment?",
  "Who announces extensions or other updates regarding homework and projects?"
]
Questions for document be5bfee4:
[
  "Is there any possibility of submitting homework after the due date even if it’s late?",
  "Can I submit my homework if the form is still open past the due date?",
  "What happens if I try to submit homework late but the form is still accessible?",
  "If the form is open after the due date, can I confirm my submission via the Course page timestamp?",
  "Are late submissions ever accepted under special circumstances or with permission?"
]
Questions for document 0e424a44:
[
  "Where can I find the homework URL to submit my assignments?",
  "What location should I use to store my homework code?",
  "Is there a specific platform I need to use for my homework repository?",
  "How will the homework repository be evaluated?",
  "Can I use any code storage service for my homework?"
]
Questions for document 29865466:
["How are homework assignments graded in terms of points in the course?", 
"Where can I find the total points I have earned for all my homework submissions?", 
"What is the point system for submitting to the FAQ section?", 
"How do points for Learning in Public posts contribute to my total?", 
"What information does the leaderboard provide regarding my course performance?"]
Questions for document 016d46a1:
[
  "How do I find my assigned display name on the leaderboard?",
  "Where can I check my display name assigned at account setup?",
  "What steps should I follow to view my display name in the course?",
  "Can I change my display name on the leaderboard, and if so, how?",
  "Where is the 'Edit Course Profile' option to find my display name?"
]
Questions for document 47972cb1:
["Is Python 3.9 still recommended in 2024?",
"Why use Python 3.9 instead of newer versions?",
"Can Python 3.10 be used for the course?",
"Is Python 3.11 supported in this course?",
"What's the reason for sticking with Python 3.9?"]
Questions for document ddf6c1b3:
[
  "Should I use my personal laptop or a virtual machine for my course environment?",
  "What challenges might I face if I set up the environment on a Windows machine?",
  "Where should I start if I choose to use a local machine for the course?",
  "What options do I have for setting up a virtual machine environment?",
  "Why did someone decide to use a virtual machine for the course?"
]
Questions for document ac25d3af:
[
    "Does GitHub Codespaces provide pre-installed tools like Docker and Python?",
    "Can I use GitHub Codespaces instead of git bash for data ingestion?",
    "Is it possible to open any GitHub repository in a GitHub Codespace?",
    "Does GitHub Codespaces offer Linux computing resources?",
    "Can I create a Docker file using GitHub Codespaces?"
]
Questions for document 251218fc:
[
  "Is using GitHub codespaces mandatory for the course?",
  "Can I complete the course using my own laptop with existing tools?",
  "Are GitHub codespaces the only recommended platform for the course?",
  "Do I need to use GCP VM for this course?",
  "Can I use my current PostgreSQL & Docker installation for the course?"
]
Questions for document 3c0114ce:
[
    "Do I need to use both GitHub Codespaces and GCP for the course?",
    "Which environment is recommended for the end project, GitHub Codespaces or GCP?",
    "Is BigQuery a necessary component of this course?",
    "Can I complete most of the course with a local environment?",
    "What should I consider when choosing between GitHub Codespaces and GCP?"  
]
Questions for document f43f5fe7:
[
  "How can I open the Run command window using a shortcut?",
  "What is the location of the 'Autorun' registry value that I need to change?",
  "How do I open the Registry Editor?",
  "What should I change the 'Autorun' registry value to for fixing this issue?",
  "Where is the 'known_hosts' file located in Windows?"
]
Questions for document d061525d:
[
  "Why are you using GCP instead of another cloud provider?",
  "Can I use AWS or Azure instead of GCP for this course?",
  "Why is a Google account necessary for this course?",
  "What is the benefit of using GCP's free trial period?",
  "Do I need a credit card to sign up for a GCP account?"
]
Questions for document 1cd01b2c:
["Do I need to pay for cloud services?",
"Will I incur costs using GCP?",
"Is there a fee for using the cloud in this course?",
"Are cloud services free for students in this course?",
"Can I use GCP without paying?"]
Questions for document e4a7c3b0:
[
  "Is it feasible to complete the entire course without using GCP or other cloud providers?",
  "Can we run almost everything from the course locally except for BigQuery?",
  "Is there a possibility of getting guidelines for setting up a home lab to replace the cloud?",
  "Are there local alternatives available for all the components used in the course?",
  "Is it necessary to use GCP for this course, or can we do the whole course locally?"
]
Questions for document 7cd1912e:
["Can I use AWS instead of GCP for this course?",
"How do I adapt the course videos for AWS?",
"What is required for the final capstone project?",
"What challenges might I face using AWS instead of GCP?",
"Where can I find help if I use AWS?"]
Questions for document 52393fb3:
{
  "question1": "Are there any other scheduled live Zoom calls besides the Office Hour?",
  "question2": "Will there be live sessions during the Capstone period?",
  "question3": "How will we know about any additional live Zoom calls?",
  "question4": "Is the Capstone period the only time for extra live Zoom calls?",
  "question5": "Will announcements be made in advance for any additional Zoom calls?"
}
Questions for document 10515af5:
["Are there any updates to the dataset we'll be working with from last year?", 
"Will we continue to use the NYC Trip data from January 2021?", 
"Has the dataset for the project changed to 2022?", 
"Is the project data the same as last year's NYC Trip data?", 
"Will the data source for our project remain consistent with last year's?"]
Questions for document cdb86a97:
[
  "Is the 2022 repository still available?",
  "Can I still access the 2022 materials?",
  "Where can I find the 2022 content?",
  "Has the 2022 repository been moved?",
  "Is the old 2022 repository deleted?"
]
Questions for document 3e0114ad:
[
  "Can I use a different tool for my final project?",
  "Is it allowed to choose any software for the final project?",
  "Am I permitted to use alternative tools for the final project?",
  "Can we select our preferred tools for the final project?",
  "Is it possible to opt for a different tool for the final project?"
]
Questions for document b2799574:
["Can I use alternative tools like Airflow or AWS instead of the ones taught in the course?",
"Is it permissible to substitute Metabase with Tableau for course assignments?",
"Can I opt for a different data stack than the recommended GCP or local installation?",
"If I choose my own toolset, will the instructors provide support?",
"Do I need to explain my tool choices during the peer review of my capstone project if I use alternative tools?"]
Questions for document 2f19301f:
[
    "What is one way we can support the course?",
    "How can we help improve the course materials?",
    "Is there a method to share this course with others?",
    "What do you suggest if I find errors in the text?",
    "How should I proceed if I have structural improvements for the repository?"
]
Questions for document 7c700adb:
{
  "questions": [
    "Is this course compatible with macOS?",
    "Can I complete the course using only Windows?",
    "Is Linux recommended for this course?",
    "Did students last year use Windows for this course?",
    "Will using different operating systems affect the course experience?"
  ]
}
Questions for document 44b14808:
["Why do Windows users encounter issues with modules containing *.sh files?", "How did previous cohorts handle modules that required shell scripts on Windows?", "When are shell scripts needed in the course modules?", "Can git bash or MINGW64 be used to run *.sh files on Windows successfully?", "Why is WSL environment setup recommended from the beginning of the course?"]
Questions for document 76e4baf6:
[
    "Can you recommend any books or additional resources for this course?",
    "Is there a list of suggested readings or additional materials?",
    "Where can I find extra resources or books for this course?",
    "Do you have any book recommendations for better understanding course topics?",
    "Are there any additional documents or resources you suggest we check out?"
]
Questions for document 48b533a8:
[
  "What happens if I miss the first project deadline?",
  "What are my options if I fail the first project attempt?",
  "Can I submit my project after the first deadline?",
  "Is a second attempt allowed for project submissions?",
  "Do I get another chance if the first project attempt is unsuccessful?"
]
Questions for document 954044d1:
["What should I do first when I encounter an issue in the course?",
"What are some methods to find a solution before asking for help?",
"Why should I avoid using screenshots or phone pictures when asking for help?",
"What information should I include when posting a question about an error?",
"What should I do if someone helps me with a problem that's not in the FAQ?"]
Questions for document a820b9b3:
[
  "When should I ask a question if the troubleshooting guide doesn't help?",
  "What information should I include when asking a coding question?",
  "What details about my OS should I provide when asking for help?",
  "Why is it important to mention the specific command or video I followed?",
  "What should I check in the error log before asking for further assistance?"
]
Questions for document f2945cd2:
[
  "What steps do I need to follow after creating a GitHub account for this course?",
  "Where can I find a tutorial for cloning the course repo to my local machine?",
  "What should I do to create my own repositories for course notes?",
  "How can I ensure that large files are not saved to my git repository?",
  "Why should I never store passwords or keys in a git repository even if it is private?"
]
Questions for document eb9d376f:
[
  "How do I fix the missing separator error in my Makefile?",
  "Why do I get a 'Makefile:2: *** missing separator.' error in VS Code?",
  "What should I do if tabs are converted to spaces in my document?",
  "What is the solution for the missing separator error in a Makefile?",
  "How can I convert spaces to tabs in my document for VS Code?"
]
Questions for document 72f25f6d:
[
  "How can I open HTML files from Linux on WSL in a Windows browser?",
  "What should I use to open an HTML file in Windows from Linux in WSL?",
  "Is it possible to open HTML files from Linux on WSL using a browser installed on Windows?",
  "How can I specify which Windows browser to use when opening HTML files from Linux on WSL?",
  "What tool do I need to install to open HTML files with a Windows browser from Linux on WSL?"
]
Questions for document a1e59afc:
[
  "How can I set up Chrome Remote Desktop on a Debian Linux VM on Compute Engine?",
  "What error occurs when trying to download 2021 Taxi Data from the TLC website?",
  "What is the alternative link to download the 2021 Yellow Taxi Trip Records?",
  "How do I unzip the 'gz' file for the 2021 Yellow Taxi Trip Records?",
  "Where can I find backup for the 2021 Yellow Taxi Trip Data if the TLC website link gives an error?"
]
Questions for document 71c10610:
[
  "How can I handle taxi data files that are provided as .csv.gz in the course?",
  "What should I change in the code to correctly store and read taxi data files named as .csv.gz?",
  "Where can I find the proper file name for the yellow taxi data file we're using?",
  "How can I modify the csv_name variable to correctly read .csv.gz files in the course?",
  "Is it possible to use the pandas read_csv function to directly read .csv.gz files?"
]
Questions for document 17a5aea1:
[
  "Where can I find the data dictionary for Yellow Taxi data in NY?",
  "Is there a data dictionary available for Green Taxi trips in NY?",
  "What is the link to the data dictionary for Yellow Taxi trips?",
  "How do I access the data dictionary for Green Taxi records?",
  "Can you provide the URL for the Yellow Taxi trip records data dictionary?"
]
Questions for document 5a275db7:
{
  "questions": [
    "How do I unzip the parquet file from the taxi data in Module 1?",
    "What should I add to the def main(params) function to handle the parquet file directly?",
    "How can I modify the code responsible for downloading files in Module 1 to handle the parquet format?",
    "How do I convert the downloaded parquet file to a CSV file in the ingest_data.py script?",
    "What command can I use in the command line to unzip the green_tripdata_2019-09.csv.gz file?"
  ]
}
Questions for document 7ec0f9b0:
{
  "questions": [
    "What should I do if wget is not recognized as an internal or external command?",
    "How can I install wget on Ubuntu?",
    "What is the easiest method to install wget on MacOS?",
    "How can I install wget using Chocolatey on Windows?",
    "Is there an alternative to wget for downloading files using Python?"
  ]
}
Questions for document bb1ba786:
["How can I resolve the wget certificate error on MacOS?",
"What should I add before wget if I am using Jupyter Notebook?",
"What is the Python library command for wget to bypass certificate verification?",
"How can I modify the wget command to ignore certificate checks?",
"What are the steps to fix wget certificate issues from the CLI on MacOS?"]
Questions for document 2f83dbe7:
[
  "How can I use the backslash as an escape character in Git Bash for Windows?",
  "What should I type in the terminal to use the backslash as an escape character in Git Bash?",
  "Is it necessary to include the bash.escapeChar command in the .bashrc file?",
  "What command does Alexey usually use for the backslash in Git Bash for Windows?",
  "Can I use the backslash as an escape character without modifying .bashrc in Git Bash for Windows?"
]
Questions for document 543ff080:
{
  "questions": [
    "How can I securely store secrets for GitHub Codespaces in Module 1?",
    "Where can I find information on managing account-specific secrets for GitHub Codespaces?",
    "What is the method for handling secrets in GitHub Codespaces discussed in Module 1?",
    "Where should I look to understand storing secrets in GitHub Codespaces?",
    "Can you guide me on where to manage secrets for GitHub Codespaces?"
  ]
}
Questions for document d407d65b:
[
    "How do I resolve the error 'Cannot connect to Docker daemon at unix:///var/run/docker.sock'?",
    "What should I check if the Docker daemon isn’t running?",
    "Is there a command to update WSL for Docker issues?",
    "What is the solution for the error 'docker daemon running' in Module 1?",
    "How can I update WSL in PowerShell?"
]
Questions for document c9375c56:
[
"How can I resolve the error during connect in Docker on Windows that mentions elevated privileges?",
"What backend options are available for Docker engine on Windows according to the official documentation?",
"What should I do if I am using Windows 10 Pro and need to enable Docker’s backend?",
"If I am a Windows 10 Home user, how can I get Docker running since Hyper-V is not available?",
"What steps should I take if I encounter the WslRegisterDistribution error while installing WSL2 on Windows?"
]
Questions for document e866156b:
[
    "Why do I get a 'requested access to the resource is denied' error when pulling a Docker image?",
    "What should I do if I see the error 'repository does not exist or may require docker login'?",
    "Do I need to log in to Docker to fetch images for this course?",
    "How can I resolve permission denied errors when creating a PostgreSQL Docker container with a mounted volume on macOS M1?",
    "What changes need to be made if I'm using Rancher Desktop and encounter errors when mounting volumes in Docker?"
]
Questions for document 16370470:
[
    "Why can't I delete a local folder mounted to a Docker volume?",
    "How do I delete a local folder that has write and read protection and is owned by user 999?",
    "What command should I use to remove a protected local folder created by a Docker container?",
    "What does the `rm -r -f` command do when deleting a Docker volume folder?",
    "Why was my Obsidian unable to start after creating a Docker container with PostgreSQL?"
]
Questions for document 316df755:
[
  "How can I fix Docker if it's stuck on starting on Windows 10 or 11?",
  "What should I do if Docker won't start and the upgrade menu isn't working?",
  "Can Docker run on the Pro Edition of Windows using Hyper-V or WSL2?",
  "Where can I find the tutorial to enable Hyper-V on Windows 10 or 11?",
  "What steps should I follow if I want to use WSL2 for Docker on Windows 10 or 11?"
]
Questions for document f3aa9252:
["Is it better to use the Windows file system or WSL for running Docker commands?",
"What should I do if Docker is stuck even after setting up WSL2 properly?",
"Can I get Docker running on Windows Home Edition through WSL2?",
"Where can I find a tutorial for setting up WSL2 for Docker on Windows Home Edition?",
"What steps can I take if resetting to factory defaults doesn’t fix my Docker issues in WSL2?"]
Questions for document a4abe7a5:
[
  "What is the recommended storage location for code to optimize file system performance in Docker?",
  "Why should I store code in my default Linux distro when using Docker?",
  "Where can I find best practices for Docker on performance?",
  "Is Docker using the WSL2 backend by default on Windows 10 Home?",
  "Which resource provides more information on Docker best practices?"
]
Questions for document fb930700:
[
  "What should I do if I get the 'input device is not a TTY' error when running Docker on Windows?",
  "How do I fix the 'input device is not a TTY' error in Docker for Ubuntu on Windows?",
  "Is there a command to prefix if I encounter 'input device is not a TTY' in Docker?",
  "Can you suggest a way to avoid seeing 'input device is not a TTY' when using Docker on Windows?",
  "How do I create an alias to automatically include 'winpty' when running Docker commands?"
]
Questions for document aa187680:
["What should I do if I cannot pip install on a Docker container in Windows?", "How can I fix the error related to 'Temporary failure in name resolution' when using pip in Docker?", "Which command might resolve connection issues when pip installing packages in a Docker container?", "What is the suggested solution for the error 'Failed to establish a new connection' in Docker?", "Can you provide a command to run a Docker container with a specified DNS to avoid pip install errors?"]
Questions for document b000e899:
{
  "questions": [
    "What should I do if the ny_taxi_postgres_data folder is empty in VS Code after running the Docker script?",
    "How can I solve the issue of my docker folder being empty when setting up ny_taxi database on Windows?",
    "What is the fix for the empty ny_taxi_postgres_data folder in VS Code despite running the Docker script correctly?",
    "Why isn't my ny_taxi_postgres_data folder populated in VS Code after executing the Docker command?",
    "How do I make the files in ny_taxi_postgres_data folder visible in VS Code when using Docker on Windows?"
  ]
}
Questions for document 9c66759f:
["What is the recommended way to set up Docker on a Mac?",
"Is the dasDocker method still valid for setting up Docker on macOS?",
"Where can I find details about setting up Docker on a Mac?",
"Has Docker changed its licensing model?",
"What method works reliably for installing Docker on a Mac?"]
Questions for document e3106e07:
["How do I solve the permissions error when trying to change permissions of /var/lib/postgresql/data in Docker?",
"What command creates a local Docker volume for PostgreSQL data?",
"Why might the folder ny_taxi_postgres_data be empty in Docker Desktop app?",
"What alternative error might occur related to /var/lib/postgresql/data directory in Docker?",
"What should I do if I encounter the error that the directory /var/lib/postgresql/data exists but is not empty?"]
Questions for document 72229da5:
[
"What should I do if mapping volumes on Windows doesn't work as shown in the course video?",
"How can I fix the error 'invalid reference format: repository name must be lowercase' when using Docker on Windows?",
"Why do I need to move my data to a folder without spaces when working with Docker on Windows?",
"What are the suggested variations for the '-v' part when mapping volumes on Windows?",
"If none of the options for mapping volumes work, what alternative do I have?"
]
Questions for document 58c9f99f:
[
    "What should I do if I get a Docker error about an invalid mode for the mounting path?",
    "How can I resolve the Docker daemon error related to \Program Files\Git\var\lib\postgresql\data?",
    "What is the correct way to change the mounting path to fix Docker errors?",
    "Can you provide examples of valid mounting paths for Docker with PostgreSQL data?",
    "What is the fix for invalid mode errors in Docker for PostgreSQL data directory?"
]
Questions for document bc42139a:
[
  "What should I do if I get an error about creating a buildmount source path when using Docker?",
  "Why does Docker give a 'file exists' error during the second run of a command with volume mounting?",
  "How can I resolve the Docker error about mkdir /run/desktop/mnt/host?",
  "Is it necessary to use the volume mounting option when running a Docker container for the second time?",
  "What is the correct command to run a PostgreSQL Docker container without mounting on the second run?"
]
Questions for document a146e3ee:
["What can I do to fix the Docker build error regarding 'can't stat /home/user/repos/data-engineering/week_1_basics_n_setup/2_docker_sql/ny_taxi_postgres_data'?",
"What specific files do I need in the directory to avoid the Docker build error?",
"How can I find a more detailed explanation of the Docker build error related to permission issues?",
"What command should I use to change directory permissions on Ubuntu to fix the Docker build error?",
"Where can I find instructions to change file or folder ownership using command prompt in Windows 10?"]
Questions for document 593a85ba:
[
  "What should I do if I see 'ERRO[0000] error waiting for container: context canceled' with Docker?",
  "How can I verify if Docker was installed via snap?",
  "What response indicates that I should deinstall Docker installed via snap?",
  "Where should I reinstall Docker from if the snap installation has issues?",
  "What does 'Bind for 0.0.0.0:5432 failed: port is a' imply in the context of Docker usage?"
]
Questions for document 50bd1a71:
["What should I do if I get a build error related to can't stat 'ny_taxi_postgres_data' in Docker?", 
"Why do I get an error about not being able to stat ‘ny_taxi_postgres_data’?", 
"How can I resolve the issue where Docker can't access 'ny_taxi_postgres_data'?", 
"What is the solution to the Docker build error regarding 'ny_taxi_postgres_data'?", 
"How do I add permissions to the 'ny_taxi_postgres_data' folder to fix a Docker error?"]
Questions for document f409f751:
[
  "Why do I get a 'permission denied' error when trying to build my Docker container on Ubuntu/Linux systems?",
  "What command should I use to grant permissions to the Docker files folder?",
  "What does running 'sudo chmod -R 755 ny_taxi_postgres_data' do?",
  "Is using 777 a solution if 755 does not fix the Docker build issue?",
  "When does the 'failed to solve with frontend dockerfile.v0' error typically occur?"
]
Questions for document 7d217da3:
["How can I find the name of a Docker network?", "What command lists Docker networks?", "Where can I look up the name of my Docker network?", "How do I get a list of available Docker networks?", "Which command should I use to see Docker networks?"]
Questions for document 09081824:
[
  "What should I do if I see a conflict error stating that the container name 'pg-database' is already in use?",
  "How can I remove a container when Docker reports a conflict with the name 'pg-database'?",
  "Which command should I use to stop a running Docker container named 'pg-database'?",
  "Is there a way to restart a Docker container without deleting it when faced with a naming conflict?",
  "What does the conflict error involving 'pg-database' typically indicate when trying to restart a Docker image?"
]
Questions for document 4df80c55:
["When using docker-compose, how can I troubleshoot the error 'could not translate host name'?", "What does the sqlalchemy.exc.OperationalError indicate when using Docker?", "How should I adjust my network name for docker-compose if 'could not translate host name' error occurs?", "What steps should be taken if a docker-compose ingestion script fails with a pgdatabase error?", "How can I identify the correct database name and network for docker-compose?"]
Questions for document 3aee7261:
[
  "How do I enable nested virtualization for Docker on an Intel CPU?",
  "What commands should I run to enable nested virtualization on an AMD CPU?",
  "Why can't I install Docker on a Windows 11 VM on top of Linux?",
  "Can you provide the steps to enable nested virtualization for an Intel CPU?",
  "What are the specific modprobe commands for an AMD CPU to enable nested virtualization?"
]
Questions for document 6497b659:
["How can I manage Docker containers, images, network, and compose projects from VS Code?", "What should I install to manage Docker in VS Code?", "Which command stops a running Docker container?", "Does the VS Code Docker extension work with Docker on WSL2?", "How do I launch the Docker features in VS Code?"]
Questions for document a02f2039:
["What should I do if my PostgreSQL Docker container shows the directory contains a database?", "How do I resolve a 'server closed the connection unexpectedly' error in a PostgreSQL Docker container?", "What steps are needed if my PostgreSQL container is not accepting requests?", "What does it mean if my PostgreSQL server terminated abnormally in Docker?", "How can I fix my PostgreSQL Docker container that won't start due to an existing database directory?"]
Questions for document c6db65aa:
[
    "What command can I use to install Docker on some Ubuntu versions?",
    "How do I install Docker if it's not installable on certain Ubuntu versions?",
    "Is there an alternative method for installing Docker on Ubuntu?",
    "Which command is recommended for Docker installation on specific Ubuntu versions?",
    "Can Docker be installed using snap on Ubuntu?"
]
Questions for document f476a606:
["What is causing the Docker-Compose mounting error related to permissions in Module 1?", 
"How do I inform the compose file about a locally created Docker volume in Module 1?",
"Is it necessary for services in the compose file to be part of the same network in Module 1?",
"How can I inspect the location of a Docker volume in Module 1?",
"What steps should I follow if Docker-Compose creates a new mounting directory instead of using an existing one in Module 1?"]
Questions for document e41b100c:
["How can I resolve 'Error translating host name to address' in Docker Compose?", "What command should I use to start containers in detached mode?", "How do I view running containers in Docker?", "How can I check logs for a specific Docker container?", "What should I do if 'docker ps' doesn’t show pg-database running?"]
Questions for document cd0f9300:
[
  "Why am I getting a 'could not translate host name \"pg-database\" to address: Name or service not known' error after running docker-compose up?",
  "What should I do if I lose database data and my Ingestion script fails with an OperationalError?",
  "How can I find the new network name created by Docker Compose?",
  "What is the solution if Docker Compose logs indicate a new default network?",
  "What tools can I use if problems persist with pgcli?"
]

Questions for document 7f845a1c:
{
  "questions": [
    "What error might occur when Docker-Compose’s hostname does not resolve?",
    "What command allows you to see all the stopped and running containers in Docker?",
    "What should you do if you get a 'could not translate host name' error?",
    "Why should you avoid using hyphens in hostname within docker-compose configurations?",
    "What should be specified in docker-compose.yml when linking services through networks?"
  ]
}
Questions for document 36e54439:
[
  "How can I make PGAdmin data persist when using Docker-Compose on GCP?",
  "Why doesn’t postgres persist its data to the specified path when running docker-compose on GCP?",
  "What is a solution for persisting PGAdmin data in Docker-Compose on GCP?",
  "What changes need to be made to Docker-Compose to make PGAdmin data persist in GCP?",
  "Is there an alternative to using path-based volumes for persisting PGAdmin data in Docker-Compose on GCP?"
]
Questions for document 32e8450c:
[
  "What should I do if my Docker keeps crashing continuously and shows 'failed to fetch extensions'?",
  "Is reinstalling Docker necessary if updating it doesn't resolve the issues?",
  "What happens when Docker engine stops and fails to fetch extensions?",
  "Will I lose all my Docker images if I have to reinstall Docker?",
  "How can I ensure my Docker is not crashing due to an outdated version?"
]
Questions for document 96606db2:
[
  "How do I persist pgAdmin configuration in Docker-Compose?",
  "What volumes need to be added to persist pgAdmin's server name in Docker-Compose?",
  "Which folder on the host machine should be used for pgAdmin's session settings in Docker-Compose?",
  "What command should be used to grant pgAdmin container access to the mounted volume?",
  "What are the environment variables needed to run the pgAdmin container in Docker-Compose?"
]
Questions for document 0882bfac:
[
  "What should I do if I get a permission denied error while using Docker-Compose?",
  "How do I make sure pgAdmin remembers my previous connections?",
  "What steps should I follow to run Docker commands without sudo?",
  "How can I create a volume for pgAdmin in docker-compose.yaml?",
  "What should I add at the end of the docker-compose.yaml file for pgAdmin data?"
]
Questions for document 7d067f5c:
["Why is docker-compose not available after modifying .bashrc?",
"What should I do if my docker-compose file is named docker-compose-linux-x86_64?",
"How can I fix docker-compose not being recognized in my Google Cloud VM?",
"Is there a step involving renaming the docker-compose file from github?",
"What did the 1.4.1 video cover regarding docker-compose installation?"]
Questions for document ff352621:
[
    "What should I do if I encounter an error getting credentials after running docker-compose up -d?",
    "How can I fix the issue when Docker-Compose shows an error getting credentials?",
    "Is there a specific package to install for resolving the Docker-Compose error related to credentials?",
    "Where can I find more information about the Docker-Compose credential error?",
    "What is the solution provided in the FAQ for the Docker-Compose credential error?"
]
Questions for document 2d653208:
["What should I do if I'm encountering errors with Docker Compose and pgadmin setup in Module 1?", "How can I create a new volume on Docker for the pgadmin setup?", "What changes do I need to make in the docker-compose.yml file for my pgadmin setup?", "What setting should I use when importing the CSV file to avoid low memory issues?", "What is the correct order of execution for setting up Docker Compose and pgadmin?"]
Questions for document f09ea61e:
["How do I fix the Docker Compose up -d error related to docker-credential-desktop?",
"Where can I find the config.json file for Docker?",
"What should I change credsStore to in the Docker config file?",
"What steps should I take after modifying the credsStore in Docker config?",
"Where is the Docker config.json file typically located on a user’s system?"]
Questions for document fbd3d2bb:
["How can I determine the appropriate docker-compose binary for WSL?",
"What command returns the system type for Docker-Compose?",
"Where can I download the suitable docker-compose binary for my system?",
"Which command helps to identify your system’s architecture for Docker-Compose?",
"What is the curl command to directly download the docker-compose for WSL?"]
Questions for document 0b014d0c:
["What should I do if I get an undefined volume error in Docker-Compose on Windows/WSL?",
"What specific error might occur if I follow the video instructions for the docker-compose.yaml file?",
"How can I fix the issue with the undefined volume in my Docker-Compose file?",
"Where should the volumes be added in the docker-compose.yaml file to avoid errors?",
"What is the correct way to refer to the volume 'dtc_postgres_volume_local' in Docker-Compose?"]
Questions for document d21bff1d:
[
  "Why am I encountering a permissions error when using WSL Docker with Windows file system?",
  "What is the solution to the permission issue caused by WSL and Windows file system conflict?",
  "Why should I use Docker volumes instead of a local volume for persistent data storage?",
  "Is the 'user:' parameter necessary when using Docker volumes?",
  "Where should I specify volume names in the docker-compose.yaml file?"
]
Questions for document 6afb7b55:
["If pgadmin is not working on Windows, what should I use for querying Postgres instead?",
"Which libraries are required for pgadmin to work properly on Windows?",
"What command can I use to install the required library for pgadmin?",
"Why might pgadmin not work on git bash or a VM in Windows?",
"Is there an alternative to pgadmin that works the same for querying Postgres?"]
Questions for document b51c3b82:
["What should I do if I encounter 'Insufficient system resources exist to complete the requested service' while using WSL?",
"What causes the error message 'Insufficient system resources exist to complete the requested service' in WSL?",
"How can I update the Windows Terminal to resolve 'Insufficient system resources' error in WSL?",
"What steps should I follow to check for pending updates for Windows Security?",
"Why is it necessary to restart the system after updating Windows Terminal and Windows Security updates?"]
Questions for document 326af690:
[
  "How can I fix the issue when WSL integration with Ubuntu stops unexpectedly with exit code 1?",
  "What should I do if restarting doesn't solve the unexpected stop of WSL integration with distro Ubuntu on Windows?",
  "What registry settings should I change to fix a DNS issue causing WSL integration problems?",
  "How do I switch Docker to Linux containers to resolve WSL integration issues?",
  "What steps should I follow to restart my OS after adjusting the DNS settings in the registry?"
]
Questions for document c2ec9047:
[
  "What should I do if I receive an error when running SSH commands through WSL2?",
  "How can I resolve the issue of WSL2 not looking for .ssh keys in the correct folder when running an SSH command?",
  "What command should I use to change the permissions of the private SSH key file?",
  "How can I create a .ssh folder in the home directory of WSL2 and copy the contents of the Windows .ssh folder to it?",
  "What steps should be taken if creating a .ssh folder and changing file permissions don't work for SSH access through WSL2?"
]
Questions for document 3b711e73:
["How can I resolve host name issues in WSL2 when it's not finding the correct .ssh/config path?", "What steps should I follow to create the correct config file in WSL2?", "Where should I create the .ssh directory in WSL2 to solve host name resolution issues?", "How do I specify the HostName and User in the WSL2 .ssh/config file?", "What file path should the IdentityFile point to in the WSL2 .ssh/config file?"]
Questions for document cfe07c9d:
["How can I resolve the PGCLI connection failed issue in Module 1?", "What does 'Change TO Socket' mean in the context of resolving PGCLI connection errors?", "Which host and port should I use to connect to the PostgreSQL server in Module 1?", "How do I specify the user and database when using PGCLI in Module 1?", "What are the exact PGCLI command parameters to resolve connection issues in Module 1?"]
Questions for document acf42bb8:
[
  "What should I do if I encounter a --help error with PGCLI in Module 1?",
  "How can I troubleshoot PGCLI installation errors in the Docker and Terraform module?",
  "What does a --help error in PGCLI likely indicate?",
  "What steps should I take if PGCLI shows an error during Module 1?",
  "How do I resolve a PGCLI --help error as mentioned in Module 1?"
]
Questions for document 176ce516:
[
  "Can we access the postgres database directly from our computer?",
  "Should pgcli be run inside another Docker container?",
  "Is it necessary to use another container to run pgcli?",
  "How can we connect to the postgres database in this course?",
  "Do we need to map pgcli to a different port?"
]
Questions for document 3e5d1e9b:
["What should I do if I get a 'password authentication failed for user root' error while using PGCLI?",
"Why do I need to use port 5431 when creating the Docker container for Postgres?",
"How do I change the port number for my Postgres Docker container?",
"What command allows me to see if something is blocking my Postgres port on MacOS?",
"How can I stop and start the local PostgreSQL service on MacOS using launchctl?"]
Questions for document 78833f32:
["What should I do if I get a PermissionError with pgcli saying '/some/path/.config/pgcli'?",
"How do I avoid the PermissionError: [Errno 13] in pgcli?",
"Why should pgcli not be installed using sudo?",
"What is the recommended way to install pgcli to avoid affecting the system python?",
"What alternatives exist if conda install gets stuck at 'Solving environment'?"]
Questions for document 63823f21:
[
  "What should I do if I encounter the ImportError: no pq wrapper available in Module 1: Docker and Terraform?",
  "How can I verify my current Python version to ensure it's at least 3.9?",
  "What steps should I take if my Python version is below 3.9 to fix the psycopg2-binary import issue?",
  "How do I install the lib for PostgreSQL after activating a new conda environment?",
  "What alternative solution can I use to solve the psycopg import issues besides updating Python?"
]
Questions for document b36ea564:
["How can I solve the issue of PGCLI being stuck on the password prompt for Postgres?", "What should I do if I receive the error PGCLI connection failed: FATAL: password authentication failed for user 'root'?", "What is a solution if the error 'PGCLI connection failed: FATAL: password authentication failed for user root' persists despite the correct password?", "What are my options when facing the PGCLI password authentication error on Windows?", "What should I check if I closed the Postgres connection in an earlier tutorial step and encountered a PGCLI connection issue?"]
Questions for document e2a46ce5:
["What can I do if pgcli command is not found after installation?", 
"What steps should I take to add pgcli to Windows PATH if it is installed but not recognized?", 
"How do I locate the directory where pgcli is installed if it is not found on Git bash?", 
"Are there different paths that I might need to add to PATH for pgcli depending on where Python is installed?", 
"Where exactly should I add the pgcli path in Windows System Variables?"]
Questions for document 27bdbc3f:
[
  "What is the alternative if I face issues running pgcli locally?",
  "Which Docker image should be used to run pgcli in a container?",
  "What is the network name used in the example for running pgcli in a Docker container?",
  "What credentials and connection details are used to run pgcli in the provided example?",
  "Where can I find more information about pgcli?"
]
Questions for document f7c5d8da:
[
  "Why is PULocationID not recognized in PGCLI without quotations?",
  "How should I refer to columns with capital letters in PGCLI?",
  "What should I do if PGCLI does not recognize my column name with capital letters?",
  "Why do I need to use quotations for certain columns in PGCLI?",
  "Where can I find more information about case sensitivity in PGCLI?"
]
Questions for document c91ad8f2:
[
  "What should I do if I get the error column 'c.relhasoids does not exist' when using \d <database name>?",
  "How can I resolve the issue of getting the error 'column c.relhasoids does not exist' in PGCLI?",
  "What are the steps to fix the error column 'c.relhasoids does not exist' in pgcli?",
  "Why is the error 'column c.relhasoids does not exist' appearing when I use PGCLI?",
  "Is there a solution for the 'column c.relhasoids does not exist' error in PGCLI?"
]
Questions for document 88bf31a0:
[
  "What should I do if I get a password authentication failed error for user 'root' when connecting to Postgres in Jupyter notebook?",
  "Why does my connection attempt to Postgres fail with port 5432 taken?",
  "How can I change the port number to avoid a conflict with port 5432 on my machine?",
  "What steps should I take if substituting the port number doesn't resolve the issue?",
  "Is there a specific service in Windows that might be causing the Postgres connection error and how can I stop it?"
]
Questions for document 23524e6d:
[
  "How can I resolve a 'FATAL: role \"root\" does not exist' error when connecting to Postgres via pgcli in Module 1?",
  "What steps should I take to resolve the 'FATAL: role \"root\" does not exist' error when uploading data via Jupyter Notebook?",
  "How do I check if a root user exists in a Postgres Docker container?",
  "What configuration changes can resolve a 'FATAL: role \"root\" does not exist' error when using Docker Compose?",
  "What command sequence should I follow to reset Postgres setup in Docker Compose after a 'role \"root\" does not exist' error?"
]
Questions for document 9211bbd6:
[
  "What should I do if I get a psycopg2.OperationalError indicating the database 'ny_taxi' does not exist?",
  "How can I confirm if Postgres is running using Docker?",
  "What port should I use if I have a pre-installed Postgres software and want to avoid conflicts?",
  "What could cause psycopg2.OperationalError when connecting to Postgres?",
  "How do I build a Postgres instance on a different port like 8080?"
]
Questions for document 5db86809:
["How do I fix a ModuleNotFoundError for the 'psycopg2' module in Postgres?", "What should I do if installing psycopg2-binary doesn't resolve the ModuleNotFoundError?", "How can I update conda if I'm getting the ModuleNotFoundError for 'psycopg2'?", "Do I need to update conda or pip before installing the psycopg packages?", "What should I do if I still get a pg_config not found error after installing psycopg2 on a Mac?"]
Questions for document 20c604dd:
[
  "How can I resolve the 'Column does not exist' error in PostgreSQL on my MacBook Pro M2?",
  "Why am I getting a 'column does not exist' error in my join queries even though the column does exist?",
  "What should I do to fix Psycopg2 errors related to column names in PostgreSQL?",
  "How do I correctly reference column names in join queries to avoid errors in PostgreSQL?",
  "What is the correct way to enclose column names in PostgreSQL join queries to prevent errors?"
]
Questions for document b11b8c15:
["Why doesn't the Create server dialog appear in pgAdmin?", "How can I create a server in the new version of pgAdmin?", "What should I do if the Create server dialog is missing in pgAdmin?", "Is there an alternative to Create server dialog in pgAdmin's new version?", "How do I find the Create server option in the updated pgAdmin?"]
Questions for document a6475348:
[
  "How can I fix a blank screen after logging into pgAdmin using GitHub Codespaces in the browser?",
  "What error does the pgAdmin container terminal show when there is a blank screen after login?",
  "How do I modify the Docker run command to set the required environment variable for pgAdmin?",
  "Is there an alternative method to solve the blank screen issue besides setting an environment variable?",
  "Does using VSCode locally to display GitHub Codespaces prevent the blank screen issue in pgAdmin?"
]
Questions for document 1ea7680e:
[
  "How can I resolve the issue of not being able to access the pgAdmin address via my browser on a Mac Pro device while connecting to a GCP Compute Engine via Remote SSH in VSCode?",
  "What modifications are needed for the 'docker run' command to access the pgAdmin address?",
  "What changes should be made to the docker-compose.yaml file to ensure access to the pgAdmin address?",
  "What should I do if I encounter a ModuleNotFoundError for 'pysqlite2' in Python?",
  "How can I fix the ImportError related to sqlite3.dll in Anaconda?"
]
Questions for document 10acd478:
["If I follow video 1.2.2 to ingest NY Taxi Data into Postgres, will I get all the rows expected?",
"What happens if I run the ingestion script in the Jupyter notebook from top to bottom a second time?",
"Why am I missing the first chunk of 100000 records when rerunning the ingestion script in the Jupyter notebook?",
"How can I ensure that all the data is ingested without missing records if I rerun the notebook script?",
"Is running the entire Jupyter notebook script from top to bottom the recommended approach for data ingestion?"]
Questions for document 752e8452:
["How can I iterate over a CSV file in Python without errors?",
"What do I need to do if I prefer to keep my CSV uncompressed for easier preview?",
"Which command should I use to unzip gzip files?",
"What should I do if I encounter a DeprecationWarning in pandas when reading a CSV?",
"How can I install gunzip on an Ubuntu machine?"]
Questions for document aa6f52b8:
["How can Pandas interpret string columns as datetime when reading a CSV file?", 
"What parameter should be used with pd.read_csv to parse dates?", 
"Can you provide an example of using pd.read_csv with parse_dates?", 
"Which columns are converted to datetime in the given example?", 
"What is the output structure when using parse_dates in pd.read_csv?"]
Questions for document 3dacbb98:
["How do I fix Python not being able to ingest data from the provided GitHub link using curl?", "What command should I use if Python can't download data from GitHub using curl?", "How can I resolve the issue of Python not retrieving data from a GitHub URL with curl?", "What is the solution for Python's ingestion problem with GitHub data using curl?", "Which os.system command should be used to fetch data from GitHub in Python using curl?"]
Questions for document 8b71a398:
[
  "What file extension is used for CSV files compressed with Gzip?",
  "How can I read a Gzip compressed CSV file using Pandas?",
  "Which Pandas function is used for reading CSV files?",
  "What parameters does the read_csv() function accept?",
  "Can you provide an example of reading a .csv.gz file with Pandas?"
]
Questions for document aa244fa0:
["How do you iterate through a parquet file in Python?", "What library can be used to read parquet files in Python?", "How can we set chunksize for parquet files since it's not as straightforward as with CSV files?", "Which Python binding of Apache Arrow can help with ingesting parquet files?", "What is the default batch size when ingesting rows from a parquet file to SQL?"]
Questions for document eac816d7:
[
  "What causes ImportError:cannot import name 'TypeAliasType' from 'typing_extensions' in SQLAlchemy?",
  "How do I resolve the ImportError related to 'TypeAliasType' in SQLAlchemy?",
  "What version of typing_extensions is required to fix the SQLAlchemy import error?",
  "Can I use pip to solve the 'TypeAliasType' import error in SQLAlchemy?",
  "Is Conda useful to update typing_extensions to resolve the SQLAlchemy error?"
]
Questions for document d44d1c77:
[
  "What is the correct connection string for psycopg with SQLAlchemy?",
  "How can I fix the 'module object is not callable' error in SQLAlchemy?",
  "What is the connection format for PostgreSQL in SQLAlchemy?",
  "How do I connect to a PostgreSQL database using SQLAlchemy with psycopg?",
  "What should be the correct syntax for create_engine when using SQLAlchemy and psycopg?"
]
Questions for document ed34766a:
[
  "What should I do if I get a ModuleNotFoundError for 'psycopg2' in my Jupyter notebook?",
  "How can I fix the error when executing a Jupyter notebook cell that involves 'create_engine' with PostgreSQL?",
  "Which Python module do I need to install to resolve a ModuleNotFoundError related to 'psycopg2'?",
  "Can I use Conda or pip to install the module required for connecting to PostgreSQL in SQLAlchemy?",
  "What is the error when 'psycopg2' is missing during the execution of 'create_engine' with PostgreSQL?"
]
Questions for document fd714677:
[
  "What should I do if the Google Cloud SDK PATH is not added to Windows automatically?",
  "How can I add Gitbash to my Windows PATH?",
  "What steps should I follow to install Anaconda Navigator for PATH setup?",
  "Which options should I select when installing Gitbash on Windows?",
  "How do I set Gitbash as the default terminal in the Windows Terminal settings?"
]
Questions for document 9de2c3e9:
[
  "Why does creating a GCP project fail with an HttpError and how can I resolve it?",
  "How do I handle the error message that a requested entity already exists when creating a GCP project?",
  "What steps should I take when a project ID I chose for GCP is already in use?",
  "Where should I create a GCP project to avoid HttpError messages?",
  "Why is using common project IDs like 'testproject' problematic in GCP?"
]
Questions for document 827dd4af:
[
  "What should I do if I encounter the error 'The project to be billed is associated with an absent billing account'?",
  "Is there a specific section in the course that explains the error related to an absent billing account in GCP?",
  "Where can I find the unique value needed to avoid the absent billing account error on GCP?",
  "Who provided the explanation for resolving the absent billing account error in the course material?",
  "What might be the reason if my billing account is not linked to my current GCP project?"
]
Questions for document a42a7e8c:
[
    "What should I do if Google Cloud refuses my credit/debit card?",
    "Will Google Cloud support help if my account is suspended?",
    "What should I expect if my Google Cloud account is suspended?",
    "Can I use a different type of card if my current one is not accepted by Google Cloud?",
    "Is there a specific card that has worked for others who had issues with Google Cloud?"
]
Questions for document 4eefdd01:
{
  "questions": [
    "How do I locate my ny-rides.json file in Google Cloud Platform?",
    "Which steps are involved in finding the ny-rides.json file in GCP?",
    "Where exactly is my ny-rides.json file stored in GCP?",
    "Could you guide me to the ny-rides.json file on GCP?",
    "What are the steps for adding a key as JSON to find ny-rides.json on GCP?"
  ]
}
Questions for document 0282578d:
[
  "Do I need to delete my instance in Google Cloud during Module 1?",
  "Is it necessary to delete the instance in Google Cloud for the week 1 readings?",
  "Should I follow Alexey's example and delete my Google Cloud instance in Module 1?",
  "In the lecture, Alexey deleted his Google Cloud instance. Do I have to do the same?",
  "Do I have to delete my instance in Google Cloud platform to complete week 1 readings?"
]
Questions for document bd3e60fd:
[
  "What command shows real-time information about CPU, memory, and processes?",
  "Which command do I use to display system memory usage and availability?",
  "How can I list all running processes with detailed information?",
  "What command shows network interface configuration?",
  "Which command lists hardware configuration information on my VM?"
]
Questions for document c4e9bc60:
[
  "What should I do if I receive a billing error despite having set my billing account?",
  "How can I resolve the Error 403 related to dataset billing in Module 1?",
  "What does the Error 403 message about billing and expiration time signify?",
  "How can I enable billing for my project if it shows billingNotEnabled even after setting it up?",
  "What workaround can I use if re-enabling billing resolves the Error 403 for the dataset?"
]
Questions for document f10b49be:
{
    "questions": [
        "How do I fix the issue with setting the quota project in GCP for Windows?",
        "What should I do if I can't start my GCP Virtual Machine due to no resources?",
        "What are the steps to reinstall the Google Cloud SDK on Windows if I'm having trouble?",
        "How can I resolve the 'quota exceeded' or 'API not enabled' error in GCP?",
        "What steps should I follow to create a new GCP VM instance from an image?"
    ]
}
Questions for document 3184bd8b:
[
  "Is it necessary to use a GCP VM in Module 1 for configuring the environment?",
  "When might using my own environment be more advantageous than a GCP VM?",
  "What issues led to the creation of the GCP VM setup video?",
  "Can I commit changes directly to a Github repo when working inside the GCP VM?",
  "What is the method of cloning the repo in the GCP VM and how does it affect committing?"
]
Questions for document 8bea4d53:
["Where should I create the .ssh directory when using GCP VM in Module 1?", "Why do I get a permission denied error when creating a directory in GCP VM?", "What is the correct directory to create the .ssh folder in Module 1?", "How can I resolve the permission denied error when creating .ssh in GCP VM?", "Where can I find more information on directory creation errors in Module 1?"]
Questions for document 86d11cc0:
[
  "What should I do if I get a permission denied error while saving a file in a GCP VM using VS Code?",
  "How can I fix the 'EACCES: permission denied' error when trying to save a file in the 'week_2/airflow/dags' directory via VS Code?",
  "What command can change the ownership of files I am editing in a GCP VM through VS Code?",
  "Why am I unable to write a file in the 'week_2/airflow/dags' folder of my GCP VM using VS Code?",
  "How do I resolve the 'NoPermissions (FileSystemError)' error encountered in VS Code when saving a file in a GCP VM?"
]
Questions for document 2cb48591:
["How can I resolve a connection timeout when trying to SSH into my VM?",
"What steps should I take if I can no longer connect to my GCP VM via SSH?",
"Why might my connection request to my VM be timing out after a period of it working fine?",
"How do I find the External IP of my GCP VM for SSH configuration?",
"What should I check if my VM connection request keeps timing out?"]
Questions for document 9523c813:
[
  "How can I fix 'no route to host' when connecting to port 22 on a GCP VM?",
  "What should I do if I can't connect to port 22 on my Google Compute Engine instance?",
  "What is a solution for SSH connection timeouts on port 22 in a GCP VM?",
  "How do I allow SSH access through the firewall in a Google Cloud VM?",
  "What startup script can be added to a GCP VM to allow SSH?"
]
Questions for document 4f8d9174:
["How can I forward ports for pgAdmin, Postgres, and Jupyter Notebook from GCP VM without using VS Code?", "What command should I run on the VM machine to start Docker and Jupyter Notebook?", "What SSH command should I use to forward port 5432 to my local machine from a GCP VM?", "How can I access pgAdmin from my local machine after port forwarding from a GCP VM?", "What should I do if I encounter credential issues when accessing Jupyter Notebook from my local machine?"]
Questions for document 29f84a82:
["How can I resolve the issue where gcloud auth hangs in MS VS Code using WSL2?", "What should I do if I see a prompt to open gcloud auth via browser and it shows an error in MS VS Code using WSL2?", "What is the solution if gcloud auth doesn't work when using the default browser in MS VS Code with WSL2?", "How do I configure trusted domains for gcloud auth in MS VS Code running in WSL2?", "What steps should I follow if gcloud auth login does not display the login page correctly in MS VS Code with WSL2?"]
Questions for document 20a01fd0:
["Why do I get a provider registry error when using Terraform with Google?", 
"What causes Terraform to fail in querying available provider packages?", 
"How can I resolve an internet connectivity issue in Terraform?", 
"What should I check if Terraform cannot access the provider registry?", 
"What steps should I take if terraform init fails due to registry access problems?"]
Questions for document 5a712a20:
["What does the error 'oauth2: cannot fetch token: dial tcp: i/o timeout' mean in Terraform?", 
"How can I fix the network issue where Google is not accessible in my country while using Terraform?", 
"What VPN application settings helped resolve the Terraform apply network error?", 
"Does the terminal program automatically follow the system proxy for Terraform?", 
"Who should I contact if I face a similar network issue with Terraform?"]
Questions for document 06021091:
[
  "How do I install Terraform on Windows Subsystem for Linux?",
  "Where can I find instructions for setting up Terraform in WSL?",
  "Is there a guide for Terraform installation on Windows 10 with Linux Subsystem?",
  "Can you provide a link for configuring Terraform on WSL?",
  "What's the resource for installing Terraform on the Windows Linux Subsystem?"
]
Questions for document df8ea7e8:
["Where can I find a solution for the Terraform state lock error?", "What is the official link to resolve the Terraform state lock issue?", "Can you provide the link for addressing the Terraform state lock error?", "Is there a GitHub link for the Terraform state lock error solution?", "Where should I look for help with the Terraform - Error acquiring the state lock?"]
Questions for document 1093daf5:
[
"How can a time desync on my machine cause a 400 Bad Request error in Terraform on WSL?",
"What error message might I see if my system time is causing Terraform to fail on WSL2?",
"What should be done to fix the Invalid JWT Token error when using Terraform on WSL2?",
"Where in the Terraform error message should I look to identify a JWT token issue?",
"Why does running sudo hwclock -s help solve JWT token errors in Terraform?" 
]
Questions for document 947213b1:
[
  "What does Error 403: Access denied mean in Terraform?",
  "How do I resolve googleapi Error 403 in Terraform?",
  "Which file should $GOOGLE_APPLICATION_CREDENTIALS point to?",
  "What command sets the GOOGLE_APPLICATION_CREDENTIALS environment variable correctly?",
  "How do I activate a service account with gcloud?"
]
Questions for document 002d4943:
[
  "Do I need multiple service accounts for Terraform or is one sufficient?",
  "After acquiring the credentials file, are there additional steps to use it with Terraform?",
  "Is a single service account adequate for all resources in this course?",
  "What should I do after obtaining the service account keys for Terraform?",
  "Are multiple service accounts required for setting up Terraform in this course?"
]
Questions for document 8dc77677:
[
  "Where is the link to Terraform 1.1.3 for Linux (AMD 64)?",
  "How do I download Terraform 1.1.3 for Linux?",
  "Can you provide the URL for Terraform 1.1.3 Linux AMD 64?",
  "Where can I get Terraform 1.1.3 for Linux AMD 64?",
  "What is the download link for Terraform 1.1.3 Linux AMD 64?"
]
Questions for document 29d3d343:
[
 "Why does Terraform display 'initialized in an empty directory'?",
 "What causes the error message 'no Terraform configuration files'?",
 "Why do I see an error when running 'terraform init'?",
 "What should I do before using the 'terraform init' command?",
 "Where should I run 'terraform init' to avoid configuration errors?"
]
Questions for document e2095203:
["What should I do when I get a 403 Error due to insufficient authentication scopes in Terraform?", "How do I resolve Error 403: Access denied in Google API while using Terraform?", "How can I fix the issue of insufficient authentication scopes causing a 403 Error in Terraform?", "What are the steps to set the GOOGLE_APPLICATION_CREDENTIALS for resolving the 403 Error in Terraform?", "Can you explain how to export GOOGLE_APPLICATION_CREDENTIALS to fix a 403 Error in Terraform?"]
Questions for document 22a2b9f2:
["How can I resolve the Terraform error related to googleapi Error 403: Permission denied when creating a bucket?",
"What should I declare as my Project ID to fix the error in Terraform?",
"Why does terraform-trans-campus@trans-campus-410115.iam.gserviceaccount.com not have the required access in Google Cloud?",
"What specific action is being denied according to the Terraform error message regarding googleapi Error 403?",
"Where can I find the correct Project ID to resolve the permission issue in Terraform?"]
Questions for document 5d7588f0:
["How do I handle the sensitivity of the credentials file in Module 1?", "What code configuration is needed for the credentials in the Google provider?", "What is the purpose of the 'provider' block in the Google provider configuration?", "How should I structure the credentials file in Terraform for Google Cloud?", "What variables are required for the Google provider configuration in Terraform?"]
Questions for document 5276a695:
[
    "In Module 1, how do I resolve the SQL error indicating the column 'Zone' doesn't exist when querying the zones_taxi table?",
    "Why do column names that start with uppercase letters need to be enclosed in double quotes in SQL queries?",
    "What should I do if the 'Astoria Zone' does not exist in my dataset while querying the zones table?",
    "Can you explain why the SQL query failed when using SELECT * FROM zones_taxi WHERE Zone='Astoria Zone'?",
    "Is there a specific workaround for querying column names with uppercase letters in SQL for Module 1 homework?"
]
Questions for document 70c159df:
[
  "Why do I get an error saying 'Column Zone doesn't exist' when using SQL SELECT Zone FROM taxi_zones?",
  "How can I avoid using quotation marks all the time in my SQL queries for taxi zones?",
  "What should I do if SQL says the column 'Zone' doesn't exist in the taxi_zones table?",
  "Is there a way to make my database operations more convenient in Pandas with taxi_zones data?",
  "How can I ensure my SQL queries do not fail due to case sensitivity in column names when working with taxi zone data?"
]
Questions for document f55efcf0:
[
    "What should I do if I get a CURL error saying 'Could not resolve host' while working on a Mac?",
    "How can I resolve the CURL error: '(6) Could not resolve host' while using macOS?",
    "What is the solution for Mac users facing the CURL 'output.csv' error?",
    "Which command should a Mac user execute to fix the 'Could not resolve host' error in CURL?",
    "For Mac, how do I correct the CURL 'Could not resolve host: output.csv' issue?"
]
Questions for document 2b7a8512:
[
  "Where should my SSH config file be located to resolve hostname issues in Module 1?",
  "How can I fix the 'Could not resolve hostname linux' error in the Docker and Terraform module?",
  "In which directory should I place my SSH config file to avoid SSH errors in Module 1?",
  "What is the solution for the 'Name or service not known' error in the first module?",
  "What steps should I take if my SSH cannot resolve the hostname in the Docker and Terraform section?"
]
Questions for document 1cd746c4:
["Why is 'pip' not recognized as an internal or external command?", "How do I add Anaconda to my PATH on Linux?", "What steps should I follow to add Anaconda to my PATH on Windows using Git Bash?", "How do I make the Anaconda PATH change permanent on MacOS?", "How can I configure PATH for Anaconda on Windows without Git Bash?"]
Questions for document 6d367222:
[
    "How can I resolve the error 'error starting userland proxy: listen tcp4 0.0.0.0:8080: bind: address already in use'?",
    "What steps should I take if I get a 'permission denied' error while trying to stop a Docker container?",
    "How do I fix the error 'cannot import module psycopg2' on Linux?",
    "What does the error 'docker build Error checking context: can't stat <path-to-file>' mean and how can I solve it?",
    "What should I do if Docker doesn't have permission to access a file during the build process?"
]
Questions for document 84e601e1:
[
  "How can I create a pip-compatible requirements.txt file from Anaconda?",
  "What command should I avoid using to generate a requirements.txt file in Anaconda?",
  "Why might `pip freeze > requirements.txt` not be ideal in Anaconda?",
  "What is the correct sequence of commands to generate a pip-friendly requirements.txt file in Anaconda?",
  "Is `conda list -d > requirements.txt` a good method for creating a pip-compatible requirements file?"
]
Questions for document 4cf83cc2:
[
    "Where can I find the FAQ questions from previous cohorts for Module 2 on Prefect?",
    "Is there a document with FAQ questions for the Prefect orchestration module from past cohorts?",
    "Can you point me to the link for FAQ questions on Airflow from previous classes?",
    "Do you have a resource with previous cohorts' FAQ for the Airflow portion of Module 2?",
    "Where are the previous cohorts' FAQ for the orchestration module using Prefect?"
]
Questions for document 5adc5188:
[
  "Why do Docker containers exit instantly with code 132 during docker compose up?",
  "What does the Mage documentation list as the cause for Docker containers exiting with code 132?",
  "Can the issue of Docker containers exiting with code 132 be resolved without purchasing new hardware?",
  "What is a recommended solution if Docker containers exit with code 132 and I can't buy new hardware?",
  "What additional information would be helpful to diagnose the Docker containers exiting issue on a VirtualBox VM?"
]
Questions for document 3ef0bb96:
[
  "How can I fix unexpected kernel restarts or memory issues with WSL 2 in Module 2?",
  "What might cause WSL 2 to run out of memory while using Docker?",
  "How many CPU cores should I dedicate to Docker in WSL 2 to prevent kernel restarts?",
  "How do I edit the .wslconfig file to allocate more resources to Docker in WSL 2?",
  "What steps should I follow to restart Docker Desktop after modifying WSL 2 settings?"
]
Questions for document a41ce360:
[
  "Where can I find information on configuring Postgres in Module 2?",
  "What is the solution to the configuration issue of Postgres?",
  "Where is the discussion on Postgres configuration issue?",
  "Can I get the link to the Postgres solution?",
  "Where should I look on Slack for the Postgres solution thread?"
]
Questions for document b1cf59e5:
[
  "How do I resolve the psycopg2.OperationalError related to connection refusal in Module 2?",
  "What should the POSTGRES_PORT variable be set to in the io_config.yml file?",
  "Is it necessary to change the POSTGRES_PORT if there's already a conflicting postgres installation on my host machine?",
  "What is the default postgres port for the mage container?",
  "Does the POSTGRES_PORT variable refer to the mage container port or the host port?"
]
Questions for document f9d6f8bd:
[
  "Why does executing SELECT 1; in MAGE - 2.2.4 cause a KeyError?",
  "I'm getting a KeyError when running SELECT 1; in MAGE - 2.2.4. What might be the issue?",
  "How can I fix the KeyError when executing SELECT 1; in the MAGE - 2.2.4 section?",
  "What should I check if I encounter a KeyError with SELECT 1; in MAGE - 2.2.4?",
  "Which profile should be selected to avoid KeyError when running SELECT 1; in MAGE - 2.2.4?"
]
Questions for document f3adb937:
[
  "How do I resolve the MAGE -2.2.4 ConnectionError related to a timeout error?",
  "What should I do if I encounter a TimeoutError while working with MAGE -2.2.4?",
  "What is the solution for the NotFound: 404 error while testing a BigQuery connection in MAGE -2.2.4?",
  "How can I fix the issue of a not found dataset error in BigQuery for MAGE -2.2.4?",
  "What are the steps to ensure the service account has the correct roles/permissions for BigQuery in MAGE -2.2.4?"
]
Questions for document eb3d6d36:
["What is a common error related to invalid JWT tokens in Module 2?,",
"Where can I find a solution for the RefreshError involving invalid JWT tokens in Module 2?",
"Can you provide a solution link for the 'invalid_grant: Invalid JWT' RefreshError?",
"What does the 'invalid_grant: Invalid JWT' error message suggest checking in JWT claims?",
"Which section discusses the RefreshError related to invalid JWT tokens?"]
Questions for document a76e1f4d:
[
  "What causes the IndexError: list index out of range in Mage 0.9.61?",
  "Where can I find the origin of the solution for the Mage 2.2.4 IndexError?",
  "What version of Mage addresses the IndexError issue mentioned in 2.2.4?",
  "How can I update to the fixed docker container version in Mage?",
  "What modification is needed in the docker-compose.yaml to fix the Mage error?"
]
Questions for document 934facf8:
["How can I resolve the OSError when trying to save a file into a non-existent directory in Module 2?", "What code should I add to create a non-existent directory in Module 2 workflow?", "Where can I find more details about fixing OSError in Module 2's workflow?", "Which section of Module 2 addresses issues with saving files into directories?", "What should I do if the directory does not exist in Module 2 workflow orchestration?"]
Questions for document a2c7b59f:
["What are the missing steps for deploying Mage using Terraform to GCP in DE Zoomcamp 2.2.7?", "Can you guide me on what to set in variables.tf for deploying Mage?", "How do I enable the Cloud Filestore API in Google Cloud Console?", "What are the Terraform commands for initiating and applying the deployment for Mage?", "What should I do when prompted by Terraform to enter the PostgreSQL password during the deployment?"]
Questions for document 997d4aaa:
[
"How do I change port mappings to run multiple Mage instances in Docker from different directories?",
"What should I do if I encounter the error 'Request had insufficient authentication scopes' while deploying Mage to GCP?",
"What is the host port where Mage will run on my local machine according to the given example?",
"Can you provide instructions on how to change VM permissions in GCP to fix the insufficient authentication scope error?",
"Where can I find additional help on resolving the '403: Request had insufficient authentication scopes' error?"
]
Questions for document bc269b95:
[
  "What issue might I face with security policies quota on GCP during the free trial?",
  "How can I resolve the Load Balancer Problem in module 2.2.7d on GCP?",
  "What steps should I take to address deployment issues with terraform on a free trial GCP account?",
  "Which file should I delete when facing the load balancer problem on a free trial GCP account?",
  "Which lines in the main.tf file need to be commented or deleted to fix the load balancer issue on GCP?"
]
Questions for document 10ea342e:
[
  "What should I do if I encounter an error when running terraform apply in Module 2?",
  "Where can I find the correct regions and zones for the GCP deployment?",
  "How long does it take to deploy MAGE Terraform files on a GCP Virtual Machine?",
  "What should I check if my free GCP credits are depleting rapidly?",
  "Is it normal for terraform destroy to not delete all resources, and what should I do if it happens?"
]
Questions for document 4bd23594:
[
    "What should I do if I get a 403 error creating a VPC access connector in Module 2?",
    "How can I resolve the 'permission vpcaccess.connectors.create denied' error in Module 2?",
    "What permission is required to create a VPC access connector in the given resource?",
    "Why am I getting an IAM_PERMISSION_DENIED error when creating a VPC access connector?",
    "Which role do I need to add to my Service Account to avoid the vpcaccess.connectors.create error in Module 2?"
]
Questions for document b0d48cd7:
[
  "How can I save a file in a directory that doesn’t exist in Module 2: Workflow Orchestration?",
  "Why won't Git push my empty 'data/green' folder to GitHub?",
  "What should I do if GitHub storage causes issues when writing files locally?",
  "How can I ensure a folder is created if it doesn’t exist using Pathlib?",
  "Why does the relative path for writing files locally not work when using GitHub storage?"
]
Questions for document 70a37f2c:
[
  "Is there a difference between the column names lpep_pickup_datetime and tpep_pickup_datetime in the datasets?",
  "How should I handle scripts if I encounter lpep_pickup_datetime in the green dataset?",
  "What should I do if my dataset has tpep_pickup_datetime while my script expects lpep_pickup_datetime?",
  "Do I need to modify my script differently depending on whether I'm using the green or yellow dataset?",
  "Why does the yellow dataset use a different column name for pickup datetime than the green dataset?"
]
Questions for document 8ab78bee:
["How to handle process termination when downloading VSC using Pandas?",
"What is the function to read CSV in chunks to avoid process termination?",
"How can I append data to a parquet file using fastparquet?",
"What compression method should I use when saving data to a parquet file?",
"What engine should be used to append data in a parquet file with Pandas?"]
Questions for document 54c6db2f:
[
  "What should I do if I'm denied access while pushing a Docker image?",
  "How can I ensure I am logged in properly to Docker Desktop?",
  "What username should I use when building and pushing Docker images?",
  "What might cause a 'requested access denied' error when pushing to Docker?",
  "Can using the wrong username cause issues when pushing a Docker image?"
]
Questions for document c5b998f3:
["What should I do if my flow script fails with a 'killed' message?",
"Why does my flow script get 'killed'?",
"What does the 'killed' message mean in my flow run?",
"How can I fix a flow script that was 'killed'?",
"What could be the reason behind my flow run 'killed' error and how can I resolve it?"]
'''

# Split the document content by the "Questions for document" header
sections = document_content.split("Questions for document ")

# Parse the questions from each section
questions = {}
for section in sections[1:]:
    document_id, question_text = section.split("\n", 1)
    document_id = document_id.strip()
    
    if question_text.startswith("["):
        # Questions in a list format
        questions[document_id] = json.loads(question_text)
    elif question_text.startswith("{"):
        # Questions in a dictionary format
        questions[document_id] = json.loads(question_text)
    else:
        print(f"Unsupported question format for document {document_id}")

# Print the parsed questions
for document_id, document_questions in questions.items():
    print(f"Questions for document {document_id}:")
    print(document_questions)
    print()

JSONDecodeError: Invalid \escape: line 3 column 59 (char 153)

In [58]:
import json

def parse_ground_truth_document(doc_text):
    lines = doc_text.strip().split('\n')
    parsed_data = {}
    current_doc_id = None
    current_questions = []

    for line in lines:
        line = line.strip()
        if line.startswith('Questions for document '):
            if current_doc_id and current_questions:
                parsed_data[current_doc_id] = current_questions
                current_questions = []

            current_doc_id = line.split(':')[1].strip()
        elif line.startswith('[') or line.startswith('{'):
            try:
                questions_data = json.loads(line)
                if isinstance(questions_data, list):
                    current_questions.extend(questions_data)
                elif isinstance(questions_data, dict):
                    current_questions.extend(questions_data.values())
            except json.JSONDecodeError:
                continue
    
    # Add the last document
    if current_doc_id and current_questions:
        parsed_data[current_doc_id] = current_questions
    
    return parsed_data


ground_truth_text = ('..data/result.txt')

parsed_ground_truth = parse_ground_truth_document(ground_truth_text)

# Print parsed data
for doc_id, questions in parsed_ground_truth.items():
    print(f"Questions for document {doc_id}:")
    for idx, question in enumerate(questions, start=1):
        print(f"  Question {idx}: {question}")
    print()