## The purpose of this notebook is to prep the raw data for citation network analyses

## Packages and Modules

In [1]:
# Data Manipulation
import pandas as pd 
import numpy as np 

# Data visualization
import matplotlib.pyplot as plt 
import seaborn as sns 

# NLP
import nltk 
nltk.data.path.append("../models/")
from nltk.util import ngrams  
from nltk import pos_tag
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from collections import Counter
import re 

## The dataset

In [2]:
raw_data = pd.read_csv("../data/02_intermediate/newlines_apos_removed_paragraphs_added.csv")
data = raw_data.copy()
data.sample(5)

Unnamed: 0,article_title,authors,abstract,full_text,citation_info,processed,paragraph
2873,A functional analysis of the comprehensive app...,"J E Selinske, R D Greer, and S Lodhi",This study tested the effects of a comprehensi...,JOURNAL OF APPLIED BEHAVIOR ANALYSIS\n\n199152...,J Appl Behav Anal. 1991 Spring; 24(1): 107–117.,JOURNAL OF APPLIED BEHAVIOR ANALYSIS 199152451...,"['JOURNAL OF APPLIED BEHAVIOR ANALYSIS', '1991..."
2413,Increasing Recycling In Academic Buildings: A ...,"T Ludwig, T Gray, and A Rowell",,(cid:74)(cid:79)(cid:85)(cid:82)(cid:78)(cid:6...,J Appl Behav Anal. 1998 Winter; 31(4): 683–686.,(cid:74)(cid:79)(cid:85)(cid:82)(cid:78)(cid:6...,['(cid:74)(cid:79)(cid:85)(cid:82)(cid:78)(cid...
497,They have a Voice; are we Listening?,Joseph Veneziano1 and Shannon Shea2,\nThe field of applied behavior analysis (ABA)...,Behavior Analysis in Practice (2023) 16:127–14...,Behav Anal Pract. 2023 Mar; 16(1): 127–144.,Behavior Analysis in Practice (2023) 16:127–14...,['Behavior Analysis in Practice (2023) 16:127–...
80,Using Instructive Feedback to Increase Respons...,Regina A. Carroll and Tiffany Kodak,We evaluated the effects of instructive feedba...,Analysis Verbal Behav (2015) 31:183–199\nDOI 1...,Anal Verbal Behav. 2015 Oct; 31(2): 183–199.,Analysis Verbal Behav (2015) 31:183–199 DOI 10...,['Analysis Verbal Behav (2015) 31:183–199\nDOI...
915,The Best and Worst Things Staff Report About B...,"Dennis H. Reid,1 Carolyn W. Green,1 Marsha B. ...",A variable affecting the success of staff trai...,Behavior Analysis in Practice (2019) 12:523–53...,Behav Anal Pract. 2019 Sep; 12(3): 523–535.,Behavior Analysis in Practice (2019) 12:523–53...,['Behavior Analysis in Practice (2019) 12:523–...


In [3]:
# Isolate the journal names
journal_names = []
for citation in data['citation_info'].astype(str):
    # Find the year via regex
    match = re.search(r'\d{4}', citation)
    if match:
        end_index = match.start()
        journal_name = citation[:end_index].strip()
    else:
        journal_name = citation  # If no year is found, assume the whole string is the journal name

    journal_names.append(journal_name)

data['journal'] = journal_names
data['journal'].unique()

array(['Anal Verbal Behav.', 'nan', 'VB News.', 'Behav Anal Pract.',
       'J Appl Behav Anal.', 'J Exp Anal Behav.', 'Perspect Behav Sci.',
       'Behav Anal.'], dtype=object)

In [4]:
# Swap out the journal shorthand with an easier to use abbreviation
journal_dict = {
    'Anal Verbal Behav.': 'TAVB', 
    'VB News.': 'TAVB', 
    'Behav Anal Pract.': 'BAP', 
    'J Appl Behav Anal.': ' JABA', 
    'J Exp Anal Behav.': 'JEAB', 
    'Perspect Behav Sci.': 'PoBS', 
    'Behav Anal': 'PoBS'
}
data['journal'] = data['journal'].map(journal_dict)
data['journal'].unique()

array(['TAVB', nan, 'BAP', ' JABA', 'JEAB', 'PoBS'], dtype=object)

In [5]:
data.sample(5)

Unnamed: 0,article_title,authors,abstract,full_text,citation_info,processed,paragraph,journal
8930,Stimulus control: Part I,James A. Dinsmoor,In his effort to distinguish operant from resp...,"The Behavior Analyst\n\n1995, 18, 51-68\n\nNo....",Behav Anal. 1995 Spring; 18(1): 51–68.,"The Behavior Analyst 1995, 18, 51-68 No. 1 (Sp...","['The Behavior Analyst', '1995, 18, 51-68', 'N...",
3632,"Comparison of extinction, DRO 0-sec, and DRO 6...",Steve S. Martinez,,"JOURNAL OF APPLIED BEHAVIOR ANALYSIS\n\n1977, ...",J Appl Behav Anal. 1977 Summer; 10(2): 315.,"JOURNAL OF APPLIED BEHAVIOR ANALYSIS 1977, 10,...","['JOURNAL OF APPLIED BEHAVIOR ANALYSIS', '1977...",JABA
559,Treatment Integrity Reporting in Behavior Anal...,"Galan Falakfarsa, Denys Brand, Lea Jones, Erik...",Treatment integrity is the extent to which pro...,Behavior Analysis in Practice (2022) 15:443–45...,Behav Anal Pract. 2022 Jun; 15(2): 443–453.,Behavior Analysis in Practice (2022) 15:443–45...,['Behavior Analysis in Practice (2022) 15:443–...,BAP
9230,Courseware and behavioral instruction: The des...,"B. Robert Ober, Timothy N. Trainor, and Georg...",,"The Behavior Analyst\n\n1985, 8, 273-274\n\nNo...",Behav Anal. 1985 Fall; 8(2): 273–274.,"The Behavior Analyst 1985, 8, 273-274 No. 2 (F...","['The Behavior Analyst', '1985, 8, 273-274', '...",
2660,The significance and future of functional anal...,F. Charles Mace,"Iwata, Dorsey, Slifer, Bauman, and Richman (19...",JOURNAL OF APPUED BEHAVIOR ANALYSIS\n\n1994)27...,J Appl Behav Anal. 1994 Summer; 27(2): 385–392.,JOURNAL OF APPUED BEHAVIOR ANALYSIS 1994)27.38...,"['JOURNAL OF APPUED BEHAVIOR ANALYSIS', '1994)...",JABA


In [11]:
data['processed'][0].lower().split("method")

["the analysis of verbal behavior (2021) 37:1–16 https://doi.org/10.1007/s40616-020-00138-x r e s e a r c h a r t i c l e teaching children with autism spectrum disorder to mand “why?” priya patil 1 & tina m. sidener 1 anjalee nirgudkar 2 & heather pane 1 & sharon a. reeve 1 & accepted: 18 september 2020/ # association for behavior analysis international 2021 published online: 14 january 2021 abstract for most children with autism spectrum disorder (asd), manding for information is an important skill that must be systematically taught. although previous studies have evaluated interventions for teaching other mands for information, to date no studies have demonstrated effective procedures for teaching the mand “why?” the purpose of the present study was to teach 3 children with asd to mand “why?” under relevant establishing operation conditions in 3 distinct scenarios. a trial-unique multiple-exem- plar procedure was used to promote generalization and increase the value of informa- tion