# Import required modules

In [1]:
import pandas as pd # For using data frames
pd.options.mode.copy_on_write = True # See https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
import re           # For using regular expressions

## Process the data

We want to turn `data/summary_documentosRNE.txt` into a clickable table of titles and urls.
In order to do this, we'll need the functions below:

In [2]:
def parse_summaries(file="data/summary_documentosRNE.txt"):
    # Read the file line by line
    with open(file, 'r') as f:
        lines = f.readlines()

    # Initialize an empty list to store the data
    data = []

    # Loop through each line and extract the relevant information
    for line in lines:
        # Use regular expressions to extract the title, album, date, genre, language, and id
        match = re.search(r'-i (\d+)\.mp3.*-metadata title="([^"]+)" -metadata album="([^"]+)" -metadata date="([^"]+)" -metadata genre="([^"]+)" -metadata language="([^"]+)"', line)
        if match:
            id = match.group(1)
            title = match.group(2)
            album = match.group(3)
            date = match.group(4)
            genre = match.group(5)
            language = match.group(6)
            # Append the data to the list
            data.append({'id': id, 'title': title, 'album': album, 'date': date, 'genre': genre, 'language': language})

    # Convert the list to a pandas DataFrame
    df = pd.DataFrame(data)
    
    return(df)

def simplify(df):
    """ Return just title, date, id and url to mp3 file """
    sdf = df[['title', 'date', 'id']]
    sdf['mp3'] = "https://ztnr.rtve.es/ztnr/" + sdf['id'] + ".mp3" # Build the url using the id
    
    sdf.set_index('id', inplace=True)                              # The id is unique, so it constitutes a good index
    sdf = sdf.sort_values('date', ascending=False)                 # Sort by date (newest first)
    
    return(sdf)

We're ready to use the functions.
The result is `sim_df`, a `pandas.DataFrame` object.

In [3]:
raw_df = parse_summaries()
sim_df = simplify(raw_df)

## Ready for publishing

The data frame above is good for data analysis, but not yet ready for publishing.
Let's translate it to markdown, and dump it into a file.

In [None]:
with open("table.md", "w") as text_file:
    text_file.write("%s" % sim_df.to_markdown())