In [19]:
import os
import json
import re

import frontmatter

def get_first_sentence(text):
    # Regular expression to match the end of a sentence (period, question mark, or exclamation mark)
    match = re.search(r'([.!?])\s', text)
    if match:
        end = match.start() + 1
        return text[:end].replace('\n', ' ')\
            .replace('\u2019', "'")\
            .replace(' #', '')\
            .replace('#', '')\
            .replace ('  ', ' ')\
            .strip()
    else:
        return text  # Return the whole text if no sentence-ending punctuation is found

def parse_frontmatter(directory):
    articles = []
    for filename in os.listdir(directory):
        if filename.endswith(".md"):
            with open(os.path.join(directory, filename), 'r') as file:
                # Parse front matter and content
                post = frontmatter.load(file)
                first_sentence = get_first_sentence(post.content.strip())  # Get first sentence of the content
                article_info = {
                    "title": post.metadata.get("title", "").replace('\u2019', "'"),
                    "subTitle": post.metadata.get("subTitle", ""),
                    "category": post.metadata.get("category", ""),
                    "preview": first_sentence
                }
                articles.append(article_info)
    return articles

# Directories to parse
directories = ["../general", "../private-equity"]

all_articles = []
for directory in directories:
    all_articles.extend(parse_frontmatter(directory))

# Write to JSON file
with open('meta_articles.json', 'w') as json_file:
    json.dump({"articles": all_articles}, json_file, indent=4)
