# Plot Number of Zenodo Links Over Time

This notebook processes the CSV files in the "download_statistics" folder and plots how many records are in these files over time based on their filenames, which represent dates.

In [1]:
# Import required libraries
import os
import pandas as pd
import matplotlib.pyplot as plt

## List CSV files in the `download_statistics` folder
Retrieve all filenames in the folder for processing.

In [2]:
# Folder containing the CSV files
folder_path = "download_statistics"

# Ensure folder exists before listing files
if not os.path.exists(folder_path):
    print(f"Folder '{folder_path}' does not exist. Please create it and add CSV files.")
else:
    # List all CSV files and extract their dates from filenames
    csv_files = [f for f in os.listdir(folder_path) if f.endswith(".csv")]
    dates = [os.path.splitext(f)[0] for f in csv_files]  # Extract date part
    dates.sort()  # Ensure chronological order
    print(dates)

Folder 'download_statistics' does not exist. Please create it and add CSV files.


## Count records in each CSV file
For every file, count how many records (rows) exist and store this information.

In [3]:
# Count records in each file and associate with dates
record_counts = []

if os.path.exists(folder_path):
    for date, file in zip(dates, csv_files):
        file_path = os.path.join(folder_path, file)
        data = pd.read_csv(file_path)
        record_counts.append(len(data))

    # Combine dates and record counts into a DataFrame
    time_data = pd.DataFrame({"Date": pd.to_datetime(dates), "RecordCount": record_counts})
    print(time_data.head())

## Plot the number of records over time
Visualize how the number of records has changed over time.

In [4]:
# Plotting
if os.path.exists(folder_path):
    plt.figure(figsize=(10, 6))
    plt.plot(time_data["Date"], time_data["RecordCount"], marker="o", linestyle="-", color="b")
    plt.title("Number of Zenodo Links Over Time")
    plt.xlabel("Date")
    plt.ylabel("Number of Records")
    plt.grid()

    # Save the plot to a PNG file
    plot_path = "zenodo_links_over_time.png"
    plt.savefig(plot_path)
    print(f"Plot saved to {plot_path}")

## Save the processed data
Export the data used for plotting to a CSV file for further analysis.

In [5]:
# Save the data to a CSV file
if os.path.exists(folder_path):
    output_csv_path = "zenodo_links_over_time.csv"
    time_data.to_csv(output_csv_path, index=False)
    print(f"Data saved to {output_csv_path}")