In [None]:
import os
import yaml
from collections import OrderedDict
import openai
import instructor
from typing import List
from pydantic import BaseModel, Field
import os
import fileinput
import re

openai.api_key = "<YOUR_API_KEY>"

## Generate SEO for each documentation page

In [None]:

def parse_front_matter(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    front_matter = []
    read_front_matter = False

    for line in lines:
        if line.strip() == '---':
            if read_front_matter:
                break
            else:
                read_front_matter = True
        elif read_front_matter:
            # Replace tabs with spaces
            line = line.replace('\t', '    ')
            front_matter.append(line)

    front_matter = "\n".join(front_matter)
    data = yaml.safe_load(front_matter)

    return data

def represent_ordereddict(dumper, data):
    return dumper.represent_mapping(
        yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG,
        data.items()
    )

yaml.add_representer(OrderedDict, represent_ordereddict)

# Define pydantic model to be output from OpenAI when doing a list of keywords
class Keyword(BaseModel):
    """Keyword for documentation page SEO"""
    keyword: str = Field(..., description="Keyword for SEO")

class DescriptionAndKeywordsForSEO(BaseModel):
    """Description and list of keywords for documentation page to improve SEO"""
    keywords: List[Keyword]
    description: str = Field(..., description="Small description to be used for SEO")

instructor.patch()

for product in ["../content/pro"]:
    for root, dirs, files in os.walk(product):
        for file in files:
            if file.endswith(".md"):
                filename = os.path.join(root, file)
                with open(filename, 'r') as f:
                    lines = f.readlines()
                
                # Find the second occurrence of '---\n'
                front_matter_end_index = lines.index('---\n', 1)
                # Get the content after the front matter
                content = ''.join(lines[front_matter_end_index+1:])
                # Get the metadata from the front matter
                metadata = parse_front_matter(filename)

                # Print the filename so we know which file we are processing
                print(filename)

                # Check if the metadata contains the required fields
                if 'title' not in metadata:
                    print(f"    title not found in {filename}")
                    print("    TITLE MISSING FOR THIS PAGE!")
                if 'sidebar_position' not in metadata:
                    # Reference documentation doesn't has sidebar missing
                    if 'reference' not in filename:
                        print(f"    sidebar_position not found in {filename}")
                        print("    SIDEBAR MISSING FOR THIS PAGE!")

                success = True
                try:
                    # Use OpenAI to get an optimize description and keywords for SEO
                    response = openai.ChatCompletion.create(
                        model="gpt-4",
                        response_model=DescriptionAndKeywordsForSEO,
                        max_retries=2,
                        messages=[
                            {   "role": "system", 
                                "content": "You are an expert with 20+ years in marketing and SEO. You are required to work on metadata for each docusaurus page to improve SEO based on content written."
                            },
                            {
                                "role": "user", 
                                "content": f"Return a list of keywords and a small description for a marketing website page, taking into account the content of this page: {content}."
                            },
                        ]
                    )
                except Exception as e:
                    success = False
                    print("    ERROR WITH OPENAI API")

                # Write the new file
                with open(filename, 'w') as f:
                    f.write('---\n')

                    if success:
                        # Format the description for Docusaurus
                        metadata['description'] = response.description.replace('\n', ' ')
                        # Format the keywords for Docusaurus, as a list of words
                        metadata['keywords'] = [item.keyword for item in response.keywords]
                    else:
                        metadata['description'] = ""
                        metadata['keywords'] = []

                    # Reorder the metadata dictionary
                    ordered_metadata = OrderedDict()
                    if "title" in metadata:
                        ordered_metadata['title'] = metadata['title']
                    if "sidebar_position" in metadata:
                        ordered_metadata['sidebar_position'] = metadata['sidebar_position']
                    ordered_metadata['description'] = metadata['description'].replace('\n', ' ')
                    ordered_metadata['keywords'] = metadata['keywords']

                    # Write the metadata to the file
                    yaml.dump(ordered_metadata, f, default_flow_style=False)

                    # Write the end of the front matter
                    f.write('---\n')

                    # Write the HeadTitle component if it is not the index.md file
                    '''
                    if "index.md" not in filename.split('/'):
                        f.write(f"""
import HeadTitle from '@site/src/components/General/HeadTitle.tsx';

<HeadTitle title="{metadata['title']} - {' - '.join([word.title().replace('-', ' ') for word in filename.split('/')[::-1][1:]])} | OpenBB Docs" />
""")
                    '''
                    # Write the content after the front matter
                    f.write(content)

# Update HeadTitle based on product

In [None]:
## FOR SDK

# # Regular expression pattern to match the line
# pattern = r'<HeadTitle title="(.+?) - (.+?) - Reference \| OpenBB SDK Docs" />'
# #pattern = r'<HeadTitle title="(.+?) - (.+?) - (.+?) - Reference \| OpenBB SDK Docs" />'
# #pattern = r'<HeadTitle title="(.+?) - (.+?) - (.+?) - (.+?) - Reference \| OpenBB SDK Docs" />'

# # Function to generate the replacement string
# def replacement(match):
#     # Convert the captured groups to lowercase and form the replacement string
#     return '<HeadTitle title="{}.{} - Reference | OpenBB SDK Docs" />'.format(match.group(2).lower(), match.group(1).lower())
#     #return '<HeadTitle title="{}.{}.{} - Reference | OpenBB SDK Docs" />'.format(match.group(3).lower(), match.group(2).lower(), match.group(1).lower())
#     #return '<HeadTitle title="{}.{}.{}.{} - Reference | OpenBB SDK Docs" />'.format(match.group(4).lower(), match.group(3).lower(), match.group(2).lower(), match.group(1).lower())


# FOR PLATFORM

# # Regular expression pattern to match the line
# #pattern = r'<HeadTitle title="(.+?) - (.+?) - Reference \| OpenBB Platform Docs" />'
# pattern = r'<HeadTitle title="(.+?) - (.+?) - (.+?) - Reference \| OpenBB Platform Docs" />'
# #pattern = r'<HeadTitle title="(.+?) - (.+?) - (.+?) - (.+?) - Reference \| OpenBB Platform Docs" />'

# Function to generate the replacement string
# def replacement(match):
#     # Convert the captured groups to lowercase and form the replacement string
#     #return '<HeadTitle title="{}.{} - Reference | OpenBB Platform Docs" />'.format(match.group(2).lower(), match.group(1).lower())
#     return '<HeadTitle title="{}.{}.{} - Reference | OpenBB Platform Docs" />'.format(match.group(3).lower(), match.group(2).lower(), match.group(1).lower())
#     #return '<HeadTitle title="{}.{}.{}.{} - Reference | OpenBB Platform Docs" />'.format(match.group(4).lower(), match.group(3).lower(), match.group(2).lower(), match.group(1).lower())


# FOR TERMINAL

# # Regular expression pattern to match the line
# #pattern = r'<HeadTitle title="(.+?) - (.+?) - Reference \| OpenBB Terminal Docs" />'
# #pattern = r'<HeadTitle title="(.+?) - (.+?) - (.+?) - Reference \| OpenBB Terminal Docs" />'
# pattern = r'<HeadTitle title="(.+?) - (.+?) - (.+?) - (.+?) - Reference \| OpenBB Terminal Docs" />'

# # Function to generate the replacement string
# def replacement(match):
#     # Convert the captured groups to lowercase and form the replacement string
#     #return '<HeadTitle title="{}/{} - Reference | OpenBB Terminal Docs" />'.format(match.group(2).lower(), match.group(1).lower())
#     #return '<HeadTitle title="{}/{}/{} - Reference | OpenBB Terminal Docs" />'.format(match.group(3).lower(), match.group(2).lower(), match.group(1).lower())
#     return '<HeadTitle title="{}/{}/{}/{} - Reference | OpenBB SDK Docs" />'.format(match.group(4).lower(), match.group(3).lower(), match.group(2).lower(), match.group(1).lower())

# BOT DISCORD

# # Regular expression pattern to match the line
# pattern = r'<HeadTitle title="(.+?) - (.+?) - Discord - Reference \| OpenBB Bot Docs" />'

# # Function to generate the replacement string
# def replacement(match):
#     # Convert the captured groups to lowercase and form the replacement string
#     return '<HeadTitle title="{}: {} - Discord Reference | OpenBB Bot Docs" />'.format(match.group(2).lower(), match.group(1).lower())

# BOT TELEGRAM

# Regular expression pattern to match the line
pattern = r'<HeadTitle title="(.+?) - (.+?) - Telegram - Reference \| OpenBB Bot Docs" />'

# Function to generate the replacement string
def replacement(match):
    # Convert the captured groups to lowercase and form the replacement string
    return '<HeadTitle title="{}: {} - Telegram Reference | OpenBB Bot Docs" />'.format(match.group(2).lower(), match.group(1).lower())


# Walk through current directory
for dirpath, dirs, files in os.walk('bot/reference/telegram'):
    for filename in files:
        filepath = os.path.join(dirpath, filename)
        # Check if file is a .md file
        if filepath.endswith('.md'):
            # Read the file
            with fileinput.FileInput(filepath, inplace=True) as file:
                for line in file:
                    # Replace the line using regular expression
                    print(re.sub(pattern, replacement, line), end='')