In [1]:
import requests
from time import sleep
from guardian_key import KEY

In [45]:
def fmt(query):
    return query.replace(' ', '%20').replace('/','%2F')

def make_query(query, section='uk-news',page_num=1, page_size=50):
    base_url = 'https://content.guardianapis.com/search?'
    parameters = {'api-key':KEY,
                  'q':fmt(query),
                  'page-size':page_size,
                  'show-tags':'keyword',
                  'tag':fmt('type/article'),
                  'order-by':'newest',
                  'page':page_num,
                  'section':section
                  }


    parameter_string = '&'.join([f"{k}={v}" for k,v in parameters.items()])
    full_url = base_url + parameter_string
    return full_url


In [46]:
def extract_story_data(article):
    article_data = dict(
        id=article['id'],
        creation_data=article['webPublicationDate'],
        title=article['webTitle'],
        url=article['webUrl'],
        sectionid=article['sectionId'],
        section=article['sectionName'],
        type=article['type']
    )
    return article_data

def extract_tags(article):
    tag_data = article['tags']

    tags = []
    for tag in tag_data:
        tags.append(tag['webTitle'])
    return tags



In [42]:
response = requests.get(url).json()['response']
response

{'status': 'ok',
 'userTier': 'developer',
 'total': 0,
 'startIndex': 0,
 'pageSize': 50,
 'currentPage': 1,
 'pages': 0,
 'orderBy': 'newest',
 'results': []}

In [47]:
collected_data = []
n_pages = 1
query = '"royal family"'

for page in range(1,n_pages+1):
    print(f'Collecting Page {page} of {n_pages}')
    url = make_query(query, page_num=page)
    response = requests.get(url).json()['response']
    results = response['results']
    for article in results:
        main_data = extract_story_data(article)
        tags = extract_tags(article)
        main_data['tags'] = tags
        collected_data.append(main_data)
    sleep(2)


Collecting Page 1 of 1


In [48]:
import pandas as pd
pd.DataFrame(collected_data)

Unnamed: 0,id,creation_data,title,url,sectionid,section,type,tags
0,uk-news/2023/jan/24/couple-missing-with-newbor...,2023-01-24T15:20:24Z,Couple missing with newborn baby may have been...,https://www.theguardian.com/uk-news/2023/jan/2...,uk-news,UK news,article,"[UK news, London]"
1,uk-news/2023/jan/21/spend-a-day-volunteering-t...,2023-01-21T22:30:19Z,"Spend a day volunteering to mark coronation, u...",https://www.theguardian.com/uk-news/2023/jan/2...,uk-news,UK news,article,"[King Charles III, Camilla (Queen Consort), Mo..."
2,uk-news/2023/jan/19/cash-and-the-crown-estate-...,2023-01-19T16:54:26Z,Cash and the crown estate: a look at the Briti...,https://www.theguardian.com/uk-news/2023/jan/1...,uk-news,UK news,article,"[Crown estate, Land ownership, King Charles II..."
3,uk-news/2023/jan/19/constance-marten-estranged...,2023-01-19T08:57:51Z,Constance Marten: estranged father of missing ...,https://www.theguardian.com/uk-news/2023/jan/1...,uk-news,UK news,article,"[UK news, Bolton, Manchester, Greater Manchest..."
4,uk-news/2023/jan/18/medieval-institution-commo...,2023-01-19T00:01:11Z,‘Medieval institution’: Commonwealth Guardian ...,https://www.theguardian.com/uk-news/2023/jan/1...,uk-news,UK news,article,"[Prince Harry, Commonwealth of Nations, Books,..."
5,uk-news/2023/jan/13/prince-harry-i-left-out-de...,2023-01-13T23:56:18Z,Prince Harry: I left out details as I feared f...,https://www.theguardian.com/uk-news/2023/jan/1...,uk-news,UK news,article,"[Prince Harry, UK news, Telegraph Media Group]"
6,uk-news/2023/jan/13/prince-harry-book-spare-re...,2023-01-13T15:14:32Z,Prince Harry’s revelations won’t help heal chi...,https://www.theguardian.com/uk-news/2023/jan/1...,uk-news,UK news,article,"[Prince Harry, Meghan, the Duchess of Sussex, ..."
7,uk-news/2023/jan/13/prince-harry-spare-book-re...,2023-01-13T12:09:22Z,‘The monarchy’s a laughing stock’: readers rea...,https://www.theguardian.com/uk-news/2023/jan/1...,uk-news,UK news,article,"[Prince Harry, Monarchy, Meghan, the Duchess o..."
8,uk-news/commentisfree/2023/jan/13/prince-harry...,2023-01-13T11:15:13Z,Why hasn’t Harry given up his ridiculous title...,https://www.theguardian.com/uk-news/commentisf...,uk-news,UK news,article,"[Prince Harry, US news, Monarchy, Aristocracy,..."
9,uk-news/2023/jan/13/prince-harry-spare-book-th...,2023-01-13T06:00:07Z,Harry wanted men to talk about their problems....,https://www.theguardian.com/uk-news/2023/jan/1...,uk-news,UK news,article,"[Prince Harry, US news, Prince William, Mental..."


In [17]:
pd.DataFrame(collected_data).to_json('guardian_data.json')

In [18]:
data = pd.read_json('guardian_data.json')

In [19]:
data['section'].value_counts()

KeyError: 'section'

In [14]:
data = data.explode('tags').dropna()

In [15]:
import networkx as nx

In [16]:
edge_list = data[['id','tags']].copy()

In [17]:
edge_list['source'] = edge_list.index

In [18]:
edge_list['target'] = edge_list['tags']

In [19]:
edge_list = edge_list[['source','target']]

In [20]:
G = nx.from_pandas_edgelist(edge_list)

In [21]:
nx.is_bipartite(G)

True

In [23]:
new_G = nx.bipartite.weighted_projected_graph(G,edge_list['target'].unique())

In [110]:
from netwulf import visualize

In [24]:
nx.write_gexf(new_G,'G.gexf')

In [26]:
edge_list

Unnamed: 0,source,target
0,0,Football
0,0,Sport
1,1,Joe Biden
1,1,Gun crime
1,1,California
...,...,...
499,499,Gender
499,499,Australian politics
499,499,Australia news
499,499,Coalition
