Merge pull request #110 from NFDI4BIOIMAGE/export_csv

Export training materials as CSV
NFDI4BIOIMAGE · Jul 10, 2024 · 8e663dd · 8e663dd
2 parents 321b3f0 + 42c8dd6
commit 8e663dd
Show file tree

Hide file tree

Showing 6 changed files with 53 additions and 0 deletions.
diff --git a/.github/workflows/book.yml b/.github/workflows/book.yml
@@ -29,6 +29,7 @@ jobs:
     - name: Build the book
       run: |
         python scripts/generate_link_lists.py
+        python scripts/export_csv.py
         jupyter-book build docs/
 
     # Push the book's HTML to github-pages

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -28,4 +28,5 @@ jobs:
     - name: Build the book
       run: |
         python scripts/generate_link_lists.py
+        python scripts/export_csv.py
         jupyter-book build docs/
diff --git a/docs/_toc.yml b/docs/_toc.yml
@@ -26,4 +26,5 @@ parts:
 
   - caption: Appendix
     chapters:
+    - file: export/readme
     - file: imprint
diff --git a/docs/export/readme.md b/docs/export/readme.md
@@ -0,0 +1,8 @@
+# Open data
+
+Following FAIR data and open access principles, you can download the resources made accessible in this website, e.g. from our [github](https://github.com/NFDI4BIOIMAGE/training) repository.
+
+For user's convenience we created some automatically updated CSV exports of specific data:
+* [training_materials.csv](training_materials.csv)
+
+[Get in touch](https://github.com/NFDI4BIOIMAGE/training/issues/new) if you need other data exported as CSV files.
diff --git a/readme.md b/readme.md
@@ -19,6 +19,7 @@ jupyter lab
 To build the book, you can run this from the same folder (tested on Windows only):
 ```
 python scripts/generate_link_lists.py
+python scripts/export_csv.py
 ```
 
 ```

diff --git a/scripts/export_csv.py b/scripts/export_csv.py
@@ -0,0 +1,41 @@
+# This file exports selected data as csv file
+source = "./resources/"
+destination = './docs/export/training_materials.csv'
+
+# We filter by specific content types
+filter_types = ['course', 'tutorial', 'video', 'blog', 'workshop', 'notebook']
+
+# We keep only selected columns
+selected_columns = ["name", "authors", "url", "tags", "license", "description"]
+
+# ------------------------------------------------------------------------------
+# Do not modify anything further down
+from generate_link_lists import load_dataframe
+
+df = load_dataframe(source)
+
+# Convert arrays to strings
+def array_to_string(arr):
+    if type(arr) != list:
+        return str(arr)
+    return ', '.join(map(str, arr))
+
+df['tags'] = df['tags'].apply(array_to_string)
+df['authors'] = df['authors'].apply(array_to_string)
+df['type'] = df['type'].apply(array_to_string)
+df['license'] = df['license'].apply(array_to_string)
+
+
+# filter type by by
+def contains_filter_word(text, words):
+    return any(word in text for word in words)
+df = df[df['type'].apply(lambda x: contains_filter_word(x, filter_types))]
+
+# select columns
+df = df[selected_columns]
+
+# save selected data
+df.to_csv(destination, index=False)
+
+num_rows = df.shape[0]
+print(f"Exported {num_rows} rows.")