## Get all the Sociologists and Anthropologist from Wiki

In [11]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import regex as re 
import json

In [12]:
def get_wiki(_page, get_txt = False):
    baseurl = "https://en.wikipedia.org/w/api.php?"
    action = "action=query"
    title = f"titles={_page}"
    content = "prop=revisions&rvprop=content&rvslots=*"
    dataformat ="format=json"
    
    query = "{}{}&{}&{}&{}".format(baseurl, action, content, title, dataformat)
    print(query)
    
    if get_txt == True:
        resp = requests.get(query).json()
        page_id = [i for i in resp['query']['pages'].keys()][0] # get page id
        txt = resp['query']['pages'][page_id]['revisions'][0]['slots']['main']['*']
        return txt
    
    else:
        return requests.get(query).json()

In [13]:
resp_soc = get_wiki("List_of_sociologists")
resp_ant = get_wiki("List_of_anthropologists")
resp_pol = get_wiki("List_of_political_scientists")
resp_psy = get_wiki("List_of_psychologists")
resp_eco = get_wiki("List_of_economists")

https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=List_of_sociologists&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=List_of_anthropologists&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=List_of_political_scientists&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=List_of_psychologists&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=List_of_economists&format=json


In [5]:
''' Soc '''
txt = resp_soc['query']['pages']['254243']['revisions'][0]['slots']['main']['*']
txt = txt.split('==A==')[1] # drop intro tekst
txt = txt.split('==References==')[0] # drop trailing stuff
sociologists = re.findall('\[\[(.*?)(?:\|.*?)?\]\]',txt)
print(f'Sociologists: {len(sociologists)}')

''' Ant '''
txt = resp_ant['query']['pages']['728']['revisions'][0]['slots']['main']['*']
txt = txt.split('==A==')[1] # drop intro tekst
txt = txt.split('==Fictional anthropologists==')[0] # drop trailing stuff
anthropologists = re.findall('\[\[(.*?)(?:\|.*?)?\]\]',txt)
print(f'Anthropologist: {len(anthropologists)}')

''' Eco '''
txt = resp_eco["query"]['pages']["'10231'"]['revisions'][0]['slots']['main']['*']
txt = txt.split('==A==')[1] # drop intro tekst
txt = txt.split('==Fictional anthropologists==')[0] # drop trailing stuff
economists = re.findall('\[\[(.*?)(?:\|.*?)?\]\]',txt)
print(f'economists: {len(economists)}')

''' Psy '''
txt = resp_psy['query']['pages']['199877']['revisions'][0]['slots']['main']['*']
txt = txt.split('==A==')[1] # drop intro tekst
txt = txt.split('==Fictional anthropologists==')[0] # drop trailing stuff
psycholists = re.findall('\[\[(.*?)(?:\|.*?)?\]\]',txt)
print(f'Psychologists: {len(psycholists)}')

''' Pol '''
txt = resp_pol['query']['pages']['37559']['revisions'][0]['slots']['main']['*']
txt = txt.split('==A==')[1] # drop intro tekst
txt = txt.split('==Fictional anthropologists==')[0] # drop trailing stuff
political_scientists = re.findall('\[\[(.*?)(?:\|.*?)?\]\]',txt)
print(f'Political_scientists: {len(political_scientists)}')


# To dict
science_dict = {'soc':sociologists,
                'anth':anthropologists,
               'eco': economists,
               'psy': psychologists,
               'pol': political_scientists}

with open('science_name_dict.json', 'w', encoding = 'utf-8') as f:
    json.dump(science_dict,f)

Sociologists: 788
Anthropologist: 277


## Get all the content from the Socs's/Anths's wiki pages

In [20]:
''' Scrape the pages '''
for field, nodes in science_dict.items():
    for node in nodes:
        node = node.replace(' ', '_')
        try:
            txt = get_wiki(node, get_txt = True)
        except KeyError as e:
            print(e, node)
            continue
        
        with open(f'wiki_content/{node}.txt', 'w', encoding = 'utf-8') as f:
            f.write(txt)

https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Peter_Abell&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Mark_Abrams&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Janet_Abu-Lughod&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Jane_Addams&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Theodor_Adorno&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Richard_Alba&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Francesco_Alberoni&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Martin_Albrow&format=json
https://en.wikipedia.org/w/api

https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Krishna_Bhattachan&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Robert_Bierstedt&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Norman_Birnbaum&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Margunn_Bjørnholt&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Donald_Black_(sociologist)&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Peter_Blau&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Kathleen_M._Blee&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Gisela_Bleibtreu-Ehrenberg&form

https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=R.W._Connell&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Paul_Connerton&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Charles_Cooley&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Anna_Julia_Cooper&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Lewis_A._Coser&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Carl_J._Couch&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Douglas_E._Cowan&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Maxine_Leeds_Craig&format=json
https://en.wikiped

https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Paul_Fauconnet&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Joe_Feagin&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Fei_Xiaotong&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Anuška_Ferligoj&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Florestan_Fernandes&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Myra_Marx_Ferree&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Enrico_Ferri_(criminologist)&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Gary_Alan_Fine&format=json
https://

https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Isacque_Graeber&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Antonio_Gramsci&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Mark_Granovetter&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Richard_Grathoff&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Andrew_M._Greeley&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Liah_Greenfeld&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Leonid_Grinin&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Ludwig_Gumplowicz&format=json
https://en.wi

https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Morris_Janowitz&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=James_M._Jasper&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Gail_Jefferson&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Yasmin_Jiwani&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Hans_Joas&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Carole_Joffe&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Benton_Johnson&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Guy_Benton_Johnson&format=json
https://en.wikipedia.org/w

https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Paul_F._Lazarsfeld&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Gustave_Le_Bon&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Frederic_Le_Play&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Emil_Lederer&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Henri_Lefebvre&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=French_people&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Marxism&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Charles_Lemert&format=json
https://en.wikipedia.org/w/a

https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Vladimir_Martynenko&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Gary_T._Marx&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Karl_Marx&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Tomáš_Garrigue_Masaryk&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Douglas_Massey&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Brian_Massumi&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Humberto_Maturana&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Marcel_Mauss&format=json
https://en.wikiped

https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Jean-Claude_Passeron&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Orlando_Patterson&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Karl_Pearson&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Willie_Pearson_Jr&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Jacqueline_Peschard&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=James_Petras&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Jean_Piaget&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Andrew_Pickering&format=json
https://en.w

https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Pierre_Sansot&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Boaventura_de_Sousa_Santos&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Giovanni_Sartori&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Saskia_Sassen&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Peter_Robert_Saunders&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Ferdinand_de_Saussure&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Albert_Schäffle&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Thomas_J._Scheff&format=

https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Thomas_Sugrue&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=William_Graham_Sumner&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Eilert_Sundt&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Edwin_Sutherland&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Ian_Svenonius&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Richard_Swedberg&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Ann_Swidler&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Jan_Szczepanski_(sociologist)&format=json
https

https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Sidney_Webb&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Alfred_Weber&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Marianne_Weber&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Max_Weber&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Frank_Webster_(sociologist)&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Margaret_Weir&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Barry_Wellman&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Ida_B._Wells-Barnett&format=json
https://en.wikipe

https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Mary_Catherine_Bateson&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Ruth_Behar&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Ruth_Benedict&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Dorothy_A._Bennett&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Carl_H._Berendt&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Lee_R._Berger&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Brent_Berlin&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Catherine_Helen_Webb_Berndt&format=json
https:

https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Alfred_Gell&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Ernest_Gellner&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Herb_Di_Gioia&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Max_Gluckman&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Maurice_Godelier&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Jane_Goodall&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Marjorie_Harness_Goodwin&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Igor_Gorevich&format=json
https://en.wikipedia.

https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Richard_Leakey&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Richard_Borshay_Lee&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Charles_Miller_Leslie&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Claude_Lévi-Strauss&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Ellen_Lewin&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=C._Scott_Littleton&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Albert_Buell_Lewis&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Oscar_Lewis&format=json
https

https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Kathy_Reichs&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Audrey_Richards&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=W._H._R._Rivers&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Paul_Rivet&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Joel_Robbins&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Renato_Rosaldo&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Gayle_Rubin&format=json
https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Robert_A._Rubinstein&format=json
https://en.wikipedia.org/w/

## Scrape af Sociological theories (Asger rod)

In [15]:
resp_soc_t = get_wiki("Category:Sociological_theories")

https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&titles=Category:Sociological_theories&format=json


In [16]:
resp_soc_t

{'batchcomplete': '',
 'query': {'normalized': [{'from': 'Category:Sociological_theories',
    'to': 'Category:Sociological theories'}],
  'pages': {'6220146': {'pageid': 6220146,
    'ns': 14,
    'title': 'Category:Sociological theories',
    'revisions': [{'slots': {'main': {'contentmodel': 'wikitext',
        'contentformat': 'text/x-wiki',
        '*': '{{Commons category|Sociological theories}}\n{{Cat main|Sociological theory}}\nThis category contains various sociological and sometimes interdisciplinary theories and [[Sociological paradigms|paradigms]]. For the different variants of theories or paradigms, please see its individual sub-category. For philosophical theories about society see [[:Category:Social theories]].\n[[Category:Philosophy of social science]]\n[[Category:Scientific theories]]\n[[Category:Social theories|*]]\n[[Category:Sociology|theories]]\n{{CatAutoTOC}}'}}}]}}}}

In [12]:
''' soc_t '''
txt = resp_soc_t['query']['pages']['6220146']['revisions'][0]['slots']['main']['*']
txt = txt.split('==A==')[1] # drop intro tekst
txt = txt.split('==Fictional anthropologists==')[0] # drop trailing stuff
soc_theories = re.findall('\[\[(.*?)(?:\|.*?)?\]\]',txt)
print(f'soc_theories: {len(soc_theories)}')

IndexError: list index out of range

In [14]:
resp_soc_t

{'batchcomplete': '',
 'query': {'normalized': [{'from': 'Category:Sociological_theories',
    'to': 'Category:Sociological theories'}],
  'pages': {'6220146': {'pageid': 6220146,
    'ns': 14,
    'title': 'Category:Sociological theories',
    'revisions': [{'slots': {'main': {'contentmodel': 'wikitext',
        'contentformat': 'text/x-wiki',
        '*': '{{Commons category|Sociological theories}}\n{{Cat main|Sociological theory}}\nThis category contains various sociological and sometimes interdisciplinary theories and [[Sociological paradigms|paradigms]]. For the different variants of theories or paradigms, please see its individual sub-category. For philosophical theories about society see [[:Category:Social theories]].\n[[Category:Philosophy of social science]]\n[[Category:Scientific theories]]\n[[Category:Social theories|*]]\n[[Category:Sociology|theories]]\n{{CatAutoTOC}}'}}}]}}}}

In [None]:
baseurl = "https://en.wikipedia.org/w/api.php?"
action = "action=query"
title = f"titles={_page}"
content = "prop=revisions&rvprop=content&rvslots=*"
dataformat ="format=json"

query = "{}{}&{}&{}&{}".format(baseurl, action, content, title, dataformat)
print(query)

if get_txt == True:
    resp = requests.get(query).json()
    page_id = [i for i in resp['query']['pages'].keys()][0] # get page id
    txt = resp['query']['pages'][page_id]['revisions'][0]['slots']['main']['*']
    return txt

else:
    return requests.get(query).json()