In [3]:
import io
import zipfile
import requests
import frontmatter

In [None]:
---
title: "Getting Started with AI"
author: "Fatimah Adeniyi"
date: "2025-09-24"
tags: ["ai", "machine-learning", "tutorial"]
difficulty: "beginner"
---


In [None]:
import frontmatter

with open('example.md', 'r', encoding='utf-8') as f:
    post = frontmatter.load(f)

# Access metadata
print(post.metadata['title'])  # "Getting Started with AI"
print(post.metadata['tags'])   # ["ai", "machine-learning", "tutorial"]

# Access content
print(post.content)  # The markdown content without frontmatter



In [13]:
url = 'https://codeload.github.com/FatimahNgozi/AWS-S3-static-website-hostiiing/zip/refs/heads/main'
resp = requests.get(url)

In [14]:
repository_data = []
#Create a ZipFile object from the downloaded content
zf = zipfile.ZipFile(io.BytesIO(resp.content))
for file_info in zf.infolist():
    filename = file_info.filename.lower()

    # Only process markdown files
    if not filename.endswith('.md'):
        continue
    # Read and parse each file
    with zf.open(file_info) as f_in:
        content = f_in.read()
        post = frontmatter.loads(content)
        data = post.to_dict()
        data['filename'] = filename
        repository_data.append(data)

zf.close()

In [17]:
print(repository_data)

[{'content': '# AWS-S3-static-website-hosting\n\nThis repository documents my learning with static website hosting on Amazon S3.\nI have my step-by-step guide on exactly how i did it\nTechnologies used includes: Amazon S3, HTML, CSS & JavaScript\n\n## Getting Started\nAfter all the necessary contents such as text, images and other media content has been gathered, we move straight to S3 bucket setup.\nAmazon S3 Bucket Setup:\nA new Amazon S3 bucket is created and configured specifically for hosting your static website(in my case MercyReads)\n\n![image](https://github.com/user-attachments/assets/ab3a4ac2-1149-4705-b0d7-13fbd5831c5f)\n\nChoose Properties and scroll down to the option Static website hosting and click on Edit button to enable the feature.\n\n![image](https://github.com/user-attachments/assets/8904e5e2-592e-47ec-905a-1dc1bd8a1703)\n\n![image](https://github.com/user-attachments/assets/4b020080-d94e-463c-ab1b-79af3df5778f)\n\n![image](https://github.com/user-attachments/asset

In [18]:
def read_repo_data(repo_owner, repo_name):
    """
    Download and parse all markdown files from a GitHub repository.
    
    Args:
        repo_owner: GitHub username or organization
        repo_name: Repository name
    
    Returns:
        List of dictionaries containing file content and metadata
    """
    prefix = 'https://codeload.github.com' 
    url = f'{prefix}/{repo_owner}/{repo_name}/zip/refs/heads/main'
    resp = requests.get(url)
    
    if resp.status_code != 200:
        raise Exception(f"Failed to download repository: {resp.status_code}")

    repository_data = []
    zf = zipfile.ZipFile(io.BytesIO(resp.content))
    
    for file_info in zf.infolist():
        filename = file_info.filename
        filename_lower = filename.lower()

        if not (filename_lower.endswith('.md') 
            or filename_lower.endswith('.mdx')):
            continue
        try:
            with zf.open(file_info) as f_in:
                content = f_in.read().decode('utf-8', errors='ignore')
                post = frontmatter.loads(content)
                data = post.to_dict()
                data['filename'] = filename
                repository_data.append(data)
        except Exception as e:
            print(f"Error processing {filename}: {e}")
            continue
    
    zf.close()
    return repository_data


In [19]:
myrepo_aws = read_repo_data('FatimahNgozi', 'AWS-S3-static-website-hostiiing')

In [21]:
print(f"AWS Documents: {len(myrepo_aws)}")

AWS Documents: 1
