# Jupyter Example Notebook

#### An example of how to do a basic visualization for a search query on the Zenodo API

In [None]:
# Install dependencies
!pip install requests seaborn matplotlib pandas

In [None]:
import requests
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# Define the URL of the API
url = "https://zenodo.org/api/records"

# Define the query parameters
params = {
    'q': 'reproducibility crisis',
    'size': 100  # Adjust the size to get more results if needed
}

# Send a GET request to the API
response = requests.get(url, params=params)

# Check if the request was successful
if response.status_code == 200:
    data = response.json()
else:
    print("Failed to retrieve data")

In [None]:
# Extract the relevant information from the JSON data
records = data['hits']['hits']
record_data = []
for record in records:
    record_data.append({
        'title': record['metadata']['title'],
        'publication_date': record['metadata']['publication_date']
    })

# Create a DataFrame from the extracted data
df = pd.DataFrame(record_data)

# Display the first few rows of the DataFrame
df.head()

In [None]:
# Group by year and count the number of records for each year
df['year'] = pd.to_datetime(df['publication_date'], errors='coerce', format='%Y-%m-%d').dt.year
df['year'] = df['year'].fillna(pd.to_datetime(df['publication_date'], errors='coerce', format='%Y-%m').dt.year)
df['year'] = df['year'].fillna(pd.to_datetime(df['publication_date'], errors='coerce', format='%Y').dt.year)
df['year'] = df['year'].astype('Int64')

# Group by year and count the number of records for each year
df_yearly = df.groupby('year').size().reset_index(name='count')

# Display the prepared data
df_yearly.head()

In [None]:
# Create a basic bar chart
sns.set(style="whitegrid")
plt.figure(figsize=(12, 8))
sns.barplot(x='year', y='count', data=df_yearly)
plt.title('Number of "Reproducibility Crisis" Records per Year on Zenodo')
plt.xlabel('Year')
plt.ylabel('Number of Records')
plt.xticks(rotation=45)
plt.show()