### Step 1: Import necessary libraries
We will use `os`, `pandas`, and `matplotlib` to read the CSV files, process data, and plot the results.

In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt

### Step 2: Define the path to the `download_statistics` folder
We will collect all CSV filenames and extract the dates from filenames.

In [2]:
folder_path = '../download_statistics'
files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
dates = [f.split('.')[0] for f in files]
dates.sort()  # Ensure dates are sorted

### Step 3: Count the number of records in each file
We will load each file and count its rows (excluding the header).

In [3]:
record_counts = []
for file in files:
    file_path = os.path.join(folder_path, file)
    df = pd.read_csv(file_path)
    record_counts.append(len(df))

### Step 4: Create a DataFrame for plotting
We will organize the counts and dates into a DataFrame for easier analysis and plotting.

In [4]:
data = pd.DataFrame({'Date': dates, 'Record Count': record_counts})
data['Date'] = pd.to_datetime(data['Date'])  # Convert to datetime
data.sort_values('Date', inplace=True)  # Ensure data is sorted by date

### Step 5: Plot the record counts over time
We will visualize the number of records in each file over time.

In [5]:
plt.figure(figsize=(10, 6))
plt.plot(data['Date'], data['Record Count'], marker='o', linestyle='-', color='b')
plt.title('Number of Records Over Time', fontsize=14)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Record Count', fontsize=12)
plt.grid(True)
plt.savefig('../docs/statistics/number_of_zenodo_records_over_time.png')
plt.close()

### Step 6: Save the data to CSV
We will save the intermediate data used for plotting to a CSV file.

In [6]:
output_csv_path = '../docs/statistics/zenodo_links_over_time.csv'
data.to_csv(output_csv_path, index=False)