In [1]:
import os
from datetime import date
import yaml
import pandas as pd

# Function to parse YAML front matter
def parse_yaml_front_matter(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        lines = f.readlines()
        yaml_lines = []
        in_yaml = False
        for line in lines:
            if line.strip() == '---':  # Start/End of YAML front matter
                in_yaml = not in_yaml
            elif in_yaml:
                yaml_lines.append(line)
            else:
                break
        yaml_content = ''.join(yaml_lines)
        front_matter = yaml.safe_load(yaml_content)
        return front_matter

# Function to process markdown files in a directory
def process_markdown_files(directory):
    markdowns_to_process = []
    today_date = date.today().strftime("%Y-%m-%d")
    for filename in os.listdir(directory):
        if filename.endswith(".md") or filename.endswith(".markdown"):
            file_path = os.path.join(directory, filename)
            front_matter = parse_yaml_front_matter(file_path)
            if front_matter:
                post_title = front_matter.get('title', None)
                post_date = front_matter.get('date', None)
                markdowns_to_process.append({
                    'filename': filename,
                    'today_date': today_date,
                    'post_title': post_title,
                    'post_date': post_date
                })
    return markdowns_to_process

# Directory containing markdown files
directory = "_posts"

# Process markdown files
markdowns_to_process = process_markdown_files(directory)

# Convert to DataFrame
df = pd.DataFrame(markdowns_to_process)

# Set maximum column width for display
pd.set_option('display.max_colwidth', 25)

# Display DataFrame
display(df)


Unnamed: 0,filename,today_date,post_title,post_date
0,2022-09-06-editors-pi...,2024-05-10,Editor's Pick! Embrac...,2022-09-06
1,2021-07-27-agile-in-a...,2024-05-10,Agile in Agriculture ...,2021-07-27 00:00:41-0...
2,2020-10-13-a-fun-and-...,2024-05-10,A fun and passionate ...,2020-10-13 04:00:12-0...
3,2023-05-09-using-metr...,2024-05-10,Using metrics to driv...,2023-05-09
4,2023-01-31-effective-...,2024-05-10,"Effective, not glamor...",2023-01-31
...,...,...,...,...
125,2023-10-31-bridging-a...,2024-05-10,Bridging AI data scie...,2023-10-31
126,2023-04-11-agile-inte...,2024-05-10,Agile internships: A ...,2023-04-11
127,2023-07-04-introducin...,2024-05-10,Introducing Network A...,2023-07-04
128,2022-02-15-an-agile-a...,2024-05-10,An agile approach to ...,2022-02-15


In [7]:
import pandas as pd
from dateutil.parser import parse

# Initialize a global counter for errors
error_count = 0

# Function to standardize date formats
def standardize_date(row):
    global error_count
    try:
        # Attempt to parse and format the date
        standardized_date = parse(row['post_date']).strftime('%Y-%m-%d')
        return standardized_date
    except Exception as e:
        # Increment error counter and print the filename
        error_count += 1
        print(f"Error processing file: {row['filename']}")
        return None

# Apply the function to the 'post_date' column
df['post_date'] = df.apply(standardize_date, axis=1)

# Print the total number of errors
print(f"Total number of errors: {error_count}")

# Display the updated DataFrame
display(df)



Error processing file: 2022-09-06-editors-pick-embracing-a-remote-work-culture.md
Error processing file: 2021-07-27-agile-in-agriculture-at-dropcopter.md
Error processing file: 2020-10-13-a-fun-and-passionate-conversation-with-the-authors-of-lean-software-development.md
Error processing file: 2023-05-09-using-metrics-to-drive-value-delivery.md
Error processing file: 2023-01-31-effective-not-glamorous-agile-change.md
Error processing file: 2024-01-30-learning-and-leading-in-a-digital-first-world.md
Error processing file: 2022-08-16-quality-agile-delivery-with-external-stakeholders.md
Error processing file: 2024-01-23-career-progression-working-above-the-waterline.md
Error processing file: 2020-09-02-agile-ideas-create-a-kanban-board-in-microsoft-project-desktop.md
Error processing file: 2022-11-01-november-1-2022-update.md
Error processing file: 2021-05-11-building-new-teams-one-sticky-paper-at-a-time-with-willem-jan-ageling.md
Error processing file: 2023-03-21-introducing-the-professio

Unnamed: 0,filename,today_date,post_title,post_date
0,2022-09-06-editors-pi...,2024-05-10,Editor's Pick! Embrac...,
1,2021-07-27-agile-in-a...,2024-05-10,Agile in Agriculture ...,
2,2020-10-13-a-fun-and-...,2024-05-10,A fun and passionate ...,
3,2023-05-09-using-metr...,2024-05-10,Using metrics to driv...,
4,2023-01-31-effective-...,2024-05-10,"Effective, not glamor...",
...,...,...,...,...
125,2023-10-31-bridging-a...,2024-05-10,Bridging AI data scie...,
126,2023-04-11-agile-inte...,2024-05-10,Agile internships: A ...,
127,2023-07-04-introducin...,2024-05-10,Introducing Network A...,
128,2022-02-15-an-agile-a...,2024-05-10,An agile approach to ...,


Total number of errors: 130
