In [1]:
from datetime import datetime
import markdown
import requests
import json
import re
import os

In [2]:
def reads_blog_post(input_file):
    # Reads original blog post file
    # Separates the header from the main content 
    
    with open(input_file, 'r') as file:
        blog_content = file.read()
        
    blog_pieces = blog_content.split('---', 2)

    if len(blog_pieces) > 2:
        blog_header = blog_pieces[1]
        blog_content = '---'.join(blog_pieces[2:])
        blog_content = blog_content.lstrip('\n')
    else:
        blog_content = ""
    
    ##### LLLLOOOOOOGGGGG!
    
    return blog_header, blog_content

In [3]:
def gets_metadata(blog_header):
    
    # Gets the blog title    
    
    title_match = re.search(r"title: ['\"]([^'\"]*)['\"]", blog_header)
    
    if title_match:
        title = title_match.group(1)
    else:
        title = ""
        
    # Gets the author
    
    author_match = re.search(r"author:[\s\S]*?-(.*?)\n", blog_header)

    if author_match:
        authors_block = author_match.group(1)
        authors = [author.strip() for author in authors_block.split('\n')]
    else:
        authors = []

    # Gets Categories/Tags
    
    categories = re.findall(r"category:\s*\n((?:\s+-\s+.+\n)+)", blog_header)

    if categories:
        categories = re.findall(r"- (.+)", categories[0])
    else:
        categories = []
        
    tags = categories.copy()
            
    # Get the publishing date
    
    date_match = re.search(r"date:\s*'(\d{4}-\d{2}-\d{2})'", blog_header)

    if date_match:
        date_str = date_match.group(1)  # Extract the date string
        # Convert the date string to a datetime object
        date_obj = datetime.strptime(date_str, '%Y-%m-%d')
        date_str = date_obj.strftime('%Y-%m-%dT%H:%M:%S.000Z')
        date = datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S.000Z')
    else:
        date = ""
    
    # Get the Hero img URL
    
    hero_url_pattern = r"hero:\s*(https?://[^\s]+)"
    hero_match = re.search(hero_url_pattern, blog_header, re.MULTILINE)

    if hero_match:
        # Extract the hero URL from the matched string
        img_hero = hero_match.group(1)
    else:
        print("Hero URL not found in the Markdown content.")
        
    ##### LLLLOOOOOOGGGGG!
        
    return title, authors, tags, date, img_hero

In [4]:
def converts_to_html(blog_content):
    
    blog_content_html = markdown.markdown(blog_content)
    
    return blog_content_html