# Plot Number of Zenodo Links Over Time

This notebook opens CSV files from the `download_statistics` folder and plots how many records are in these files over time.

In [1]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import os

## List CSV Files

List all CSV files in the `download_statistics` directory to find the data files.

In [2]:
# Directory containing CSV files
directory = '../download_statistics'

# List all CSV files in directory
files = [f for f in os.listdir(directory) if f.endswith('.csv')]
files.sort()
files

['20240628.csv',
 '20240711.csv',
 '20240809.csv',
 '20240826.csv',
 '20240903.csv',
 '20240910.csv',
 '20240917.csv',
 '20240924.csv',
 '20241001.csv',
 '20241008.csv']

## Extract Dates and Record Counts

Read each CSV file and count the number of records to associate with the corresponding date.

In [3]:
# Initialize lists to store results
dates = []
record_counts = []

# Process each file
for file in files:
    # Extract date from filename (assuming yyyyMMdd.csv format)
    date = file.split('.')[0]
    dates.append(date)
    
    # Read CSV and count records
    file_path = os.path.join(directory, file)
    df = pd.read_csv(file_path)
    record_counts.append(len(df))

# Create a DataFrame for plotting
data = pd.DataFrame({'Date': dates, 'Records': record_counts})
data['Date'] = pd.to_datetime(data['Date'])
data

Unnamed: 0,Date,Records
0,2024-06-28,34
1,2024-07-11,34
2,2024-08-09,37
3,2024-08-26,40
4,2024-09-03,43
5,2024-09-10,43
6,2024-09-17,43
7,2024-09-24,59
8,2024-10-01,59
9,2024-10-08,59


## Plot the Data

Create a plot showing the number of records over time.

In [4]:
# Plot
plt.figure(figsize=(10, 6))
plt.plot(data['Date'], data['Records'], marker='o')
plt.title('Number of Zenodo Links Over Time')
plt.xlabel('Date')
plt.ylabel('Number of Records')
plt.grid(True)

# Save the plot as an image file
plt.savefig('zenodo_links_over_time.png')
plt.close()