In [1]:
from urllib.request import Request, urlopen # send requests to web server
from lxml import html # parse HTML
import os             # access directories
import pandas as pd   # dataframes
os.chdir('../Data')

## Scrape methods from Oxford and SAGE methods dictionaries

In [2]:
# Oxford: A Dictionary of Social Research Methods (2016)
# Mark Elliot, Ian Fairweather, Wendy Olsen, and Maria Pampaka
# DOI: 10.1093/acref/9780191816826.001.0001

# # Get all methods with respective url (22 pages)
oxford_methods = []
urls = []
for i in range(1,23):
    path = 'https://www.oxfordreference.com/view/10.1093/acref/9780191816826.001.0001/acref-9780191816826?btog=chap&hide=true&page='+str(i)+'&pageSize=20&skipEditions=true&sort=titlesort&source=%2F10.1093%2Facref%2F9780191816826.001.0001%2Facref-9780191816826'
    req = Request(path, headers={'User-Agent': 'XYZ/3.0'})
    web_byte = urlopen(req, timeout = 10).read()
    webpage = web_byte.decode('utf-8')
    tree = html.fromstring(webpage)
    tree.make_links_absolute(path)
    for method in tree.xpath('//h2[@class="itemTitle"]/a/text()'):
        oxford_methods.append(method)
    for url in tree.xpath('//h2[@class="itemTitle"]/a/@href'):
        urls.append(url)                              
oxford = pd.DataFrame(list(zip(oxford_methods, urls, ['Oxford'] * len(urls))),
                      columns = ['method', 'url', 'source'])
oxford['method'] = oxford.method.str.capitalize()
oxford['method_lower'] = oxford.method.str.lower()
print(len(urls), "methods were scraped.\n")

435 methods were scraped.



In [3]:
# SAGE: Browse by Method
# https://methods.sagepub.com/datasets/method

path = 'https://methods.sagepub.com/datasets/method'

sage_methods = [
# "Access", # Deleted as too broad a term
"Action research",
"Actor network theory",
"Analysis of covariance",
"ANCOVA", # abbreviation inserted manually
"Analysis of variance",
"ANOVA", # abbreviation inserted manually
"Archival research",
"ARIMA",
"Artificial neural networks",
"Autocorrelation",
"Autoethnography",
"Bar charts",
"Bartlett's test",
"Bayesian statistics",
"Big data",
"Biographical research",
"Bootstrapping",
"Box-and-whisker",
"Box-score method",
"Case study research",
"Categorical variables",
"Census data",
"Central limit theorem",
"Chi-square test",
"Classification",
"Cluster analysis",
"Cochran's Q test",
"Coding",
"Cohort analysis",
"Collaborative research",
"Comparative research",
"Computer-assisted qualitative data analysis",
"Confidence intervals",
"Confirmatory factor analysis",
"Consensus clustering", # Adapted from "Consensus" after looking up what the method linked to
"Constant comparison",
"Constructivism",
"Content analysis",
"Contingency tables",
"Continuous variables",
"Conversation analysis",
"Convolutional neural networks",
"Cook's distance",
"Correlation",
"Cramer's V",
"Critical discourse analysis",
"Critical realism",
"Critical resistance analysis",
"Cronbach's alpha",
"Cross-tabulation",
"Data collection",
"Data management",
"Data mining",
"Data quality and data management",
"Data sharing",
"Data visualization",
"Databases",
"Deduction",
"Degrees of freedom",
"Dependent variables",
"Diary methods",
"Discourse analysis",
"Dispersion",
"Documentary analysis",
"Documentary research",
"Doing research",
"Dummy variables",
"Elite interviews",
"Ethnographic interviews",
"Ethnographic writing",
"Ethnography",
"Ethnomethodology",
"Experimental design",
"Exploratory data analysis",
"F-tests",
"Factor analysis",
"Factor scales",
"Feminism",
"Feminist research",
"Field notes",
"Fieldwork",
"Fisher's exact test",
"Fixed effects model",
"Focus groups",
"Frequency distribution",
"General linear models",
"Geographic information systems",
"Geospatial data",
"Graphs",
"Grounded theory",
"Health data",
"Hermeneutic cycle",
"Hermeneutics",
"Heteroscedasticity",
"Histograms",
"Homogeneity of variance",
"Hypothesis testing",
"Implementation",
"In-depth interviews",
"Independent variables",
"Indigenous research",
"Induction",
"Intercoder reliability",
"Internet research",
"Interpretive phenomenological analysis",
"Intervention studies",
"Kaiser-Meyer-Olkin test",
"Kendall's rank correlation",
"Key informant interviews",
"Kolmogorov-Smirnov test",
"Kruskal-Wallis test",
"Latent variable models",
"Latent variables",
"Least squares criterion",
"Levene's test",
"Life history interviews",
"Life history research",
"Linear regression",
"Log-linear analysis",
"Logistic regression",
"Logit and probit models",
"Longitudinal analysis",
"Longitudinal research",
"Mann-Whitney U test",
"Marketing research",
"Maximum likelihood estimation",
"Measurement",
"Media analysis",
"Memos",
"Mindfulness",
"Missing data",
"Mixed methods",
"Moderators",
"Monte Carlo methods",
"Multicollinearity",
"Multilevel analysis",
"Multiple analysis of variance",
"Multiple regression",
"Narrative analysis",
"Narrative research",
"National surveys",
"Naturally occurring data",
"Network analysis",
"Network visualization",
"Nodes",
"Non-parametric statistics",
"Normal distribution",
"Null hypothesis",
"Observational research",
"Observed frequencies",
"One-way analysis of variance",
"Online interviews",
"Online surveys",
"Open coding",
"Oral history interviews",
"Oral history research",
"Ordinal scales",
"Ordinary least squares",
"Outcome measures",
"Outliers",
"Paired t-test",
"Partial correlation",
"Participant observation",
"Path analysis",
"Pearson's correlation coefficient",
"Personal narratives",
"Phenomenological analysis",
"Phenomenology",
"Phi coefficient",
"Photographs",
"Pie charts",
"Pilot studies",
"Poisson regression",
"Population variance",
"Populations",
"Pre-processing",
"Principal components analysis",
"Probability",
"Process theory",
"Programming",
"Protocols",
"Python",
"Q methodology",
"Qualitative data analysis",
"Qualitative data collection",
"Qualitative interviewing",
"Quantitative data analysis",
"Quantitative data collection",
"R packages",
"R statistical package",
"Random variables",
"Range",
"Realism",
"Reasoning",
"Regression analysis",
"Relational data",
"Reliability",
"Research funding",
"Research questions",
"Research with children",
"Response rates",
"Rhetoric",
"RStudio",
"Sampling",
"Scatterplots",
"Secondary data analysis",
"Semi-structured interviews",
"Sentiment analysis",
"Situational analysis",
"Snowball sampling",
"Social constructionism",
"Social interaction",
"Social media research",
"Social network analysis",
"Social network research",
"Social structures",
"Spearman's rank order correlation",
"SPSS",
"Stakeholders",
"Standard deviations",
"Standard error",
"Stata",
"Statistical inference",
"Statistical modelling",
"Statistical packages",
"Statistical tests",
"Stem-and-leaf",
"Structural analysis",
"Structural equation modelling",
"Structuralism",
"Structured interviews",
"Summated scales",
"Supervised learning",
"Survey research",
"Survey weighting",
"T-test",
"Tape-recording",
"Teaching research methods",
"Textual analysis",
"Thematic analysis",
"Theory",
"Thick description",
"Time-series analysis",
"Topic modelling",
"Transcription",
"Two-way analysis of variance",
"Type I errors",
"Units of analysis",
"Validity",
"Video research",
"Vignettes",
"Visual research",
"Vulnerable groups",
"Web scraping",
"Wilcoxon test",
"Wilks' lambda",
"Yates' correction"]

sage = pd.DataFrame(list(zip(sage_methods, [path] * len(sage_methods), ['SAGE'] * len(sage_methods))),
                    columns = ['method', 'url', 'source'])
sage['method_lower'] = sage.method.str.lower()
print(len(sage_methods), "methods were added.\n")

257 methods were added.



In [4]:
methods = pd.concat([oxford, sage])
methods

Unnamed: 0,method,url,source,method_lower
0,Abduction,https://www.oxfordreference.com/view/10.1093/a...,Oxford,abduction
1,A/b test,https://www.oxfordreference.com/view/10.1093/a...,Oxford,a/b test
2,Accelerated longitudinal design,https://www.oxfordreference.com/view/10.1093/a...,Oxford,accelerated longitudinal design
3,Action research,https://www.oxfordreference.com/view/10.1093/a...,Oxford,action research
4,Activity theory,https://www.oxfordreference.com/view/10.1093/a...,Oxford,activity theory
...,...,...,...,...
252,Vulnerable groups,https://methods.sagepub.com/BrowseMethods?type...,SAGE,vulnerable groups
253,Web scraping,https://methods.sagepub.com/BrowseMethods?type...,SAGE,web scraping
254,Wilcoxon test,https://methods.sagepub.com/BrowseMethods?type...,SAGE,wilcoxon test
255,Wilks' lambda,https://methods.sagepub.com/BrowseMethods?type...,SAGE,wilks' lambda


In [5]:
print(len(set(methods.method_lower)), "methods were found.\n")

633 methods were found.



In [6]:
methods.to_csv("methods.csv", index = False)
# Last saved on 2022-06-08