In [10]:
import pandas as pd
import csv
import glob
import os
from datetime import date

today = date.today()

In [11]:
home_page = "[The Turkey-Palestine Source Collection](turkey_palestine_collection.md)"
separator = " // "

page_heading_element = "# "
page_heading_element_2 = "## "

project_folder = 'Sol-idarities\Sources Database'
upper_category = 'firstlevel_'

## Import Files

In [12]:
articles_view = pd.read_csv(rf'C:\Users\act1780\Documents\GitHub\{project_folder}\articles_view.csv', delimiter='\t')
publications_view = pd.read_csv(fr'C:\Users\act1780\Documents\GitHub\{project_folder}\publications_view.csv', delimiter='\t')
issues_table = pd.read_csv(fr'C:\Users\act1780\Documents\GitHub\{project_folder}\issues_view.csv', delimiter='\t')

## Re-usable Blocks
Sets a few blocks that can be reused throughout the website as variables.

Create a list of available views from the database

In [13]:
folder_path = fr"C:\Users\act1780\Documents\GitHub\andreacortellari.github.io\{project_folder}/*" 
files_list = glob.glob(folder_path)

list_of_files = []
list_of_views = []
for file_path in files_list:
    if '.csv' in file_path:
        view_name = file_path.split('.')[-2].split("\\")[-1]
        formatted_view_name = f"* {view_name}"
        list_of_files.append(formatted_view_name)
        list_of_views.append(view_name)

files = str(list_of_files).replace("'", "").replace("[", "").replace("]", "").replace(",", "")
files

''

## Create First Level Pages
Uses a for loop to create a main page for **articles**, **issues**, and **publication** using some metadata and data from the database views.

In [14]:
publications_metadata = f"In our dataset, we have {len(publications_view['publication_title'].unique())} unique publication names. The earliest publication started in {min(publications_view['first_year'].dropna())}, while the latest publication ended in {max(publications_view['last_year'].dropna())}. These publications are spread across various locations including {', '.join(sorted(publications_view['publication_location'].dropna().unique()))}."
column_to_drop = ['article_id', 'article_text', 'author_type', 'publication_id']
sorting_by_columns = ['issue_year', 'issue_month']

for first_level_page in list_of_views:
    filename = rf"{upper_category}{first_level_page.split('_')[0]}.md"
    view_file = pd.read_csv(rf'C:\Users\act1780\Documents\GitHub\andreacortellari.github.io\{project_folder}\{first_level_page}.csv', delimiter='\t')
    view_file = view_file\
        .sort_values(by=[col for col in sorting_by_columns if col in view_file.columns], 
                     ascending=[True for col in sorting_by_columns if col in view_file.columns])\
        .drop(columns=[col for col in column_to_drop if col in view_file.columns])

    with open(filename, 'w', encoding='utf-8') as f:
        f.write(f"{page_heading_element}{home_page}{separator}{first_level_page.split('_')[-0].title()}\n\n")
        f. write(f"{publications_metadata} The webpage showcases data sourced from the {first_level_page} of the database. Download this view as a .csv file <a href='https://github.com/andreacortellari/andreacortellari.github.io/blob/main/{project_folder}\{first_level_page}.csv'>by clicking on the link.</a>\n\n")
        f.write((view_file).to_markdown(index=False))

## Create each Publications' Page

In [15]:
columns_publications_articles = ['article_id', 'article_text', 'author_type', 'pages', 'publication_title', 'regular_feature_title']

csv_file = fr'C:\Users\act1780\Documents\GitHub\andrea.cortellari.github.io\{project_folder}\publications_view.csv'  # Change this to the path of your CSV file
    
with open(csv_file, newline='', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t')
        print(reader)
        for row in reader:
            publication_name = row['publication_title']
            publication_type = row['publication_type']
            start_year = row['first_year']
            end_year = row['last_year']
            total_issues = row['total_issues']
            publication_location = row['publication_location']
            articles = articles_view[articles_view['publication_title'] == publication_name]

            metadata = f"{publication_name} was a {publication_type} publication. It published {total_issues} issues in {publication_location} between {start_year} and {end_year}."
            
            filename = rf"C:\Users\act1780\Documents\GitHub\andreacortellari.github.io\_posts\2024-04-01-{publication_name}.md"
            with open(filename, 'w', encoding='utf-8') as f:
                f.write(f"""---
title: {publication_name}
date: 2024-04-18 21:15:00 +0100
categories: [Publications]
tags: []
---\n\n""")
                f.write(f"{metadata}\n\n")
                f.write(f"{publications_metadata}\n\n")
                f.write(f"{page_heading_element_2}Articles in Scope\n\n")
                f.write(f"{articles.drop(columns=columns_publications_articles).to_markdown(index=False)}\n\n" if len(articles) >= 1 else "No article focused on Palestine in our database.\n\n")
                f.write(f"{page_heading_element_2}Issues Summary\n\n")
                f.write(issues_table[issues_table['publication_title'] == publication_name].drop(columns=['publication_id', 'publication_title', 'printing_house_name']).to_markdown(index=False))

FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\act1780\\Documents\\GitHub\\andrea.cortellari.github.io\\Sol-idarities\\Sources Database\\publications_view.csv'

# Create each articles' page

In [None]:
articles_with_text = articles_view[articles_view['article_text'].notna()]

for index, row in articles_with_text.iterrows():
    filename = rf"articles_{row['article_title'].replace('?', '')}.md"
    metadata_table = articles_with_text.loc[articles_with_text['article_title'] == row['article_title']].drop(columns={'article_text'})

    with open(filename, 'w', encoding='utf-8') as f:
        f.write(f"{page_heading_element}{row['article_title']}\n\n")
        f.write(f"{metadata_table.to_markdown(index=False)}\n\n")
        #f.write(f"**Author:** {row['author']}\n\n")
        #f.write(f"**Published on:** [{row['publication_title']}]({row['publication_title']}.md)\n\n")
        #f.write(f"**Issue:** {row['issue_number']}, {row['issue_date']}\n\n")
        #f.write(f"**Pages:** {row['pages']}\n\n")
        f.write(f"{row['article_text']}\n")

## Data for Graph

In [None]:
years = range(1968, 1972)
months = range(1, 13)
timeframe_in_range = []

for year in years:
    for month in months:
        year_month = f"{year}-{month:02d}"
        timeframe_in_range.append(year_month)

timeframe_in_range = pd.DataFrame(timeframe_in_range, columns=['issue_date'])

In [None]:
pd.merge(articles_view[['publication_title', 'issue_date', 'article_id']], timeframe_in_range, how='outer')\
    .pivot_table(index='issue_date', columns='publication_title', values='article_id', aggfunc='count', fill_value=0, dropna=False).reset_index()\
    .to_csv('data_jobs\Articles Distribution.csv', index=False)