# A Python script to take a list of chalmers masters thesis URL and generate a MD file.

In [8]:
import pandas as pd
import os
import string
from collections import defaultdict
from datetime import date
import random

In [3]:
def get_author(thesis):
    authors = thesis["dc.contributor.author"]
    author = ''
    for auth in authors:
        auth = auth.replace(' ','').replace(',','-') # replace comma with dash
        author += auth + ','
    author = author[:-1]
    return author

In [7]:
download_delay = 3 # Seconds
thesisLinks_df = pd.read_csv('thesis.csv', encoding = 'unicode_escape')
thesisLinks = thesisLinks_df['url'].values.tolist()

In [5]:
for URL in thesisLinks:
  print(f'Processing {URL}')
  dfs = pd.read_html(URL)                         # read all tables in the page
  df = dfs[0]                                     # select the first table
  thesis = defaultdict(list)                      # create a dictionary to store the metadata
  print(f'Thesis fetched') # print the metadata
  for k, v in zip(df[0],df[1]):
    thesis[k].append(v)                           # store the metadata in the dictionary

  author = get_author(thesis)                     # get the author name
  date = thesis["dc.date.available"]              # get thesis date
  uri = thesis["dc.identifier.uri"] 
  title = thesis["dc.title"] # get thesis title

  if 'dc.description.abstract' in thesis.keys(): # check if abstract is available
    abstract = thesis["dc.description.abstract"]
  else:
    abstract = 'Abstract not available'   
  
  print(f'Added Author : {author}')
  today = date.today()
  newpath = 'content/project/thesis_'+ author.translate(str.maketrans('', '', string.punctuation)).replace(' ', '_').lower()
  if not os.path.exists(newpath):
    os.makedirs(newpath)
  #open text file
  text_file = open(newpath+"/index.md", "w", encoding="utf-8")
  #write string to file
  text_file.write(f"""
---
# Documentation: https://wowchemy.com/docs/managing-content/
# This markdown has been automatically generated on {today}

title: "{title}"
featured: false
summary: ""
authors: [{author}]
tags: [Teaching,Teaching_MastersThesisCompleted]
categories: [Teaching]
date: {date}

# Optional external URL for project (replaces project detail page).
external_link: ""

# Featured image
# To use, add an image named `featured.jpg/png` to your page's folder.
# Focal points: Smart, Center, TopLeft, Top, TopRight, Left, Right, BottomLeft, Bottom, BottomRight.
image:
  caption: ""
  focal_point: "Center"
  preview_only: false

# Custom links (optional).
#   Uncomment and edit lines below to show custom links.
# links:
# - name: Follow
#   url: https://twitter.com
#   icon_pack: fab
#   icon: twitter

url_code: ""
url_pdf: ""
url_slides: ""
url_video: ""

# Slides (optional).
#   Associate this project with Markdown slides.
#   Simply enter your slide deck's filename without extension.
#   E.g. `slides = "example-slides"` references `content/slides/example-slides.md`.
#   Otherwise, set `slides = ""`.
slides: ""
---
## Abstract
{abstract}
[Link to thesis]({uri})
""")
  text_file.close()
  time.sleep(download_delay+random.uniform(0, 1)) # random delay to avoid overloading the server