In [1]:
# pip install pyzotero

In [2]:
from pyzotero import zotero
import os
import tweepy as tw
import pandas as pd
import datetime
import json, sys
from datetime import date, timedelta  
import datetime
import plotly.express as px

In [3]:
library_id = '2514686'
library_type = 'group'
api_key = '' # api_key is only needed for private groups and libraries
zot = zotero.Zotero(library_id, library_type)


# All items in the Zotero Intelligence bibliography library

In [4]:
items = zot.everything(zot.top())

data3=[]
columns3=['Title','Publication type', 'Link to publication', 'Abstract', 'Zotero link', 'Date published', 'FirstName2', 'Publisher', 'Journal']

for item in items:
    data3.append((
        item['data']['title'], 
        item['data']['itemType'], 
        item['data']['url'], 
        item['data']['abstractNote'], 
        item['links']['alternate']['href'],
        item['data'].get('date'),
        item['data']['creators'],
        item['data'].get('publisher'),
        item['data'].get('publicationTitle')
        )) 
pd.set_option('display.max_colwidth', None)

df = pd.DataFrame(data3, columns=columns3)

df['Publication type'] = df['Publication type'].replace(['thesis'], 'Thesis')
df['Publication type'] = df['Publication type'].replace(['journalArticle'], 'Journal article')
df['Publication type'] = df['Publication type'].replace(['book'], 'Book')
df['Publication type'] = df['Publication type'].replace(['bookSection'], 'Book chapter')
df['Publication type'] = df['Publication type'].replace(['blogPost'], 'Blog post')
df['Publication type'] = df['Publication type'].replace(['videoRecording'], 'Video')
df['Publication type'] = df['Publication type'].replace(['podcast'], 'Podcast')
df['Publication type'] = df['Publication type'].replace(['magazineArticle'], 'Magazine article')
df['Publication type'] = df['Publication type'].replace(['webpage'], 'Webpage')
df['Publication type'] = df['Publication type'].replace(['newspaperArticle'], 'Newspaper article')
df['Publication type'] = df['Publication type'].replace(['report'], 'Report')
df['Publication type'] = df['Publication type'].replace(['forumPost'], 'Forum post')
df['Publication type'] = df['Publication type'].replace(['manuscript'], 'Manuscript')
df['Publication type'] = df['Publication type'].replace(['document'], 'Document')
df['Publication type'] = df['Publication type'].replace(['forumPost'], 'Forum post')
df['Publication type'] = df['Publication type'].replace(['conferencePaper'], 'Conference paper')
df['Publication type'] = df['Publication type'].replace(['film'], 'Film')
df['Publication type'] = df['Publication type'].replace(['presentation'], 'Presentation')

df['Publisher'] = df['Publisher'].replace(['Taylor & Francis Group', 'Taylor and Francis'], 'Taylor and Francis')
df['Publisher'] = df['Publisher'].replace(['Routledge', 'Routledge Handbooks Online'], 'Routledge')
df['Publisher'] = df['Publisher'].replace(['Praeger Security International', 'Praeger'], 'Praeger')

In [5]:
day_allitems = datetime.date.today().isoformat()

In [14]:
df.to_csv('all_items.csv')

## Item types in the library

In [7]:
df_types = pd.DataFrame(df['Publication type'].value_counts())
df_types = df_types.sort_values(['Publication type'], ascending=[False])
df_types=df_types.reset_index()
df_types = df_types.rename(columns={'index':'Publication type','Publication type':'Count'})
df_types

Unnamed: 0,Publication type,Count
0,Journal article,855
1,Book,222
2,Webpage,160
3,Thesis,87
4,Blog post,84
5,Newspaper article,64
6,Book chapter,58
7,Video,29
8,Magazine article,28
9,Podcast,24


In [8]:
fig = px.bar(df_types, x='Publication type', y='Count', color='Publication type')
fig.update_layout(
    autosize=False,
    width=1200,
    height=600,)
fig.update_xaxes(tickangle=-70)
fig.update_layout(title={'text':'Top 10 collections in the library', 'y':0.95, 'x':0.4, 'yanchor':'top'})

In [9]:
fig = px.pie(df_types, values='Count', names='Publication type')
fig.update_layout(title={'text':'Publications by type', 'y':0.95, 'x':0.45, 'yanchor':'top'})


## Items by publication years

In [10]:
df['Date published'] = pd.to_datetime(df['Date published'],utc=True, errors='coerce').dt.tz_convert('Europe/London')
df['Date year'] = df['Date published'].dt.strftime('%Y')
df['Date year'] = df['Date year'].fillna('No date')
df_year=df['Date year'].value_counts()
df_year=df_year.reset_index()
df_year=df_year.rename(columns={'index':'Publication year','Date year':'Count'})
df_year.drop(df_year[df_year['Publication year']== 'No date'].index, inplace = True)
df_year=df_year.sort_values(by='Publication year', ascending=True)

fig = px.bar(df_year, x='Publication year', y='Count')
fig.update_xaxes(tickangle=-70)
fig.update_layout(
    autosize=False,
    width=1400,
    height=700,)
fig.update_layout(title={'text':'Publications by year: all items', 'y':0.95, 'x':0.5, 'yanchor':'top'})


Parsing dates in DD/MM/YYYY format when dayfirst=False (the default) was specified. This may lead to inconsistently parsed dates! Specify a format to ensure consistent parsing.



In [11]:
df_year['Sum'] = df_year['Count'].cumsum()
fig2 = px.line(df_year, x='Publication year', y='Sum')
fig2.update_layout(title={'text':'Publications by year: cumulative sum', 'y':0.95, 'x':0.5, 'yanchor':'top'})
fig2.update_layout(
    autosize=False,
    width=1000,
    height=500,)
fig2.update_xaxes(tickangle=-70)

In [12]:
df_publisher = pd.DataFrame(df['Publisher'].value_counts())
df_publisher = df_publisher.sort_values(['Publisher'], ascending=[False])
df_publisher = df_publisher.reset_index()
df_publisher = df_publisher.rename(columns={'index':'Publisher','Publisher':'Count'})
df_publisher

Unnamed: 0,Publisher,Count
0,Routledge,48
1,Oxford University Press,20
2,Taylor and Francis,13
3,Edward Elgar Publishing,10
4,Macmillan Education UK,9
...,...,...
112,University of Oklahoma Press,1
113,Merrion Press,1
114,"Berghahn Books, Incorporated",1
115,Kodansha International,1


In [13]:
df_journal = pd.DataFrame(df['Journal'].value_counts())
df_journal = df_journal.sort_values(['Journal'], ascending=[False])
df_journal = df_journal.reset_index()
df_journal = df_journal.rename(columns={'index':'Journal','Journal':'Count'})
df_journal

Unnamed: 0,Journal,Count
0,Intelligence and National Security,423
1,International Journal of Intelligence and CounterIntelligence,81
2,Journal of Intelligence History,80
3,Journal of Conflict Studies,13
4,Journal of Strategic Studies,12
...,...,...
208,Public Policy and Administration,1
209,Security Studies,1
210,GeoJournal,1
211,International affairs [London],1
