In [None]:
pip install selenium pandas

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import csv
import os
import time
import re

# Initialize the chrome webdriver
driver = webdriver.Chrome()

# Starting URL - browse by year page
START_URL = 'https://misq.umn.edu/misq/issue/browse-by-year'

# Years to scrape (2010 to 2025)
START_YEAR = 2010
END_YEAR = 2025

# Save CSV file in the same directory as this notebook (MIS_Quarterly folder)
OUT_FILE = os.path.join(os.getcwd(), 'MISQ_Issues.csv')
print(f"CSV file will be saved to: {OUT_FILE}")
print(f"Current working directory: {os.getcwd()}\n")
data = []

def write_to_csv(rows):
    file_exists = os.path.exists(OUT_FILE) and os.path.getsize(OUT_FILE) > 0
    with open(OUT_FILE, mode='a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        if not file_exists:
            writer.writerow(["Title", "URL", "Volume Issue", "Vol Issue Year"])
            print(f"Created CSV file: {OUT_FILE}")
        writer.writerows(rows)
        file.flush()  # Ensure data is written immediately
    print(f"  ✓ Saved {len(rows)} articles to {OUT_FILE}")

def scrape_issue_page(issue_url, vol_issue, year):
    """Scrape articles from a single issue page"""
    try:
        driver.get(issue_url)
        driver.implicitly_wait(15)
        time.sleep(2)
        
        print(f"  Page loaded: {driver.title}")
        
        # Find all article links - try multiple selectors
        articles = []
        
        # Try different selectors for article links
        selectors = [
            'a[href*="/misq/vol"]',
            'a[href*="/article"]',
            '.article-title a',
            '.article a',
            'h3 a',
            'h4 a',
            '.title a',
            'article a',
            '.entry-title a',
            'li a[href*="/article"]',
            'div.article a',
            'table a[href*="/article"]'
        ]
        
        print("  Searching for articles...")
        for selector in selectors:
            try:
                found = driver.find_elements(By.CSS_SELECTOR, selector)
                if found:
                    print(f"    Selector '{selector}': Found {len(found)} links")
                    # Filter for article links (not issue links)
                    filtered = [a for a in found if a.get_attribute('href') and 
                               ('/article' in a.get_attribute('href') or 
                                ('/misq/vol' in a.get_attribute('href') and '/issue' not in a.get_attribute('href')))]
                    if filtered:
                        print(f"      → {len(filtered)} are article links")
                        articles.extend(filtered)
            except Exception as e:
                continue
        
        # Remove duplicates
        seen_urls = set()
        unique_articles = []
        for article in articles:
            try:
                url = article.get_attribute('href')
                if url and url not in seen_urls:
                    seen_urls.add(url)
                    unique_articles.append(article)
            except:
                continue
        
        if not unique_articles:
            # Fallback: find all links and filter
            print("  Trying fallback: checking all links...")
            all_links = driver.find_elements(By.TAG_NAME, 'a')
            for link in all_links:
                try:
                    url = link.get_attribute('href') or ''
                    if url and ('/article' in url or ('/misq/vol' in url and '/issue' not in url)) and url not in seen_urls:
                        seen_urls.add(url)
                        unique_articles.append(link)
                except:
                    continue
        
        print(f"  Total unique articles found: {len(unique_articles)}")
        
        rows = []
        for article in unique_articles:
            try:
                article_url = article.get_attribute('href')
                if not article_url:
                    continue
                    
                # Make sure URL is absolute
                if article_url.startswith('/'):
                    article_url = 'https://misq.umn.edu' + article_url
                
                if not article_url.startswith('http'):
                    continue
                
                # Get article title
                article_title = article.text.strip()
                if not article_title or len(article_title) < 5:
                    # Try to get title from parent or nearby element
                    try:
                        parent = article.find_element(By.XPATH, './..')
                        article_title = parent.text.strip()
                    except:
                        try:
                            # Try sibling or nearby heading
                            heading = article.find_element(By.XPATH, './preceding-sibling::h3 | ./preceding-sibling::h4 | ./following-sibling::h3 | ./following-sibling::h4')
                            article_title = heading.text.strip()
                        except:
                            article_title = "N/A"
                
                if article_url and article_title and article_title != "N/A" and len(article_title) > 5:
                    rows.append([article_title, article_url, vol_issue, year])
                    print(f"    ✓ {article_title[:60]}...")
                    
            except Exception as e:
                print(f"    Error extracting article: {e}")
                continue
        
        if rows:
            write_to_csv(rows)
            return len(rows)
        else:
            print(f"  ⚠ No articles found on this page")
            # Debug: show page structure
            try:
                page_text = driver.find_element(By.TAG_NAME, 'body').text[:300]
                print(f"  Page content preview: {page_text[:200]}...")
            except:
                pass
            return 0
            
    except Exception as e:
        print(f"  ✗ Error scraping issue page: {e}")
        import traceback
        traceback.print_exc()
        return 0

def scrape_year_page(year_url, year):
    """Scrape all issues from a year page"""
    try:
        print(f"\n{'='*60}")
        print(f"Navigating to year page: {year_url}")
        driver.get(year_url)
        driver.implicitly_wait(15)
        time.sleep(3)  # Give page more time to load
        
        print(f"Page title: {driver.title}")
        print(f"Current URL: {driver.current_url}")
        
        # Debug: Print some page content to understand structure
        try:
            page_text = driver.find_element(By.TAG_NAME, 'body').text[:500]
            print(f"Page content preview: {page_text}...")
        except:
            pass
        
        # Find all issue links - try comprehensive approach
        issue_links = []
        
        # Try different selectors for issue links
        selectors = [
            'a[href*="/misq/vol"]',
            'a[href*="/vol"]',
            '.issue-link a',
            '.issue a',
            'h2 a',
            'h3 a',
            'h4 a',
            'li a',
            '.volume a',
            'article a',
            'div a[href*="/vol"]',
            'table a[href*="/vol"]'
        ]
        
        print("\nTrying to find issue links...")
        for selector in selectors:
            try:
                links = driver.find_elements(By.CSS_SELECTOR, selector)
                if links:
                    print(f"  Selector '{selector}': Found {len(links)} links")
                    # Filter for actual issue links
                    filtered = [l for l in links if l.get_attribute('href') and ('/misq/vol' in l.get_attribute('href') or '/vol' in l.get_attribute('href'))]
                    if filtered:
                        print(f"    → {len(filtered)} are issue links")
                        issue_links.extend(filtered)
            except Exception as e:
                print(f"  Selector '{selector}': Error - {e}")
                continue
        
        # Remove duplicates
        seen_urls = set()
        unique_issue_links = []
        for link in issue_links:
            try:
                url = link.get_attribute('href')
                if url and url not in seen_urls:
                    seen_urls.add(url)
                    unique_issue_links.append(link)
            except:
                continue
        
        print(f"\nTotal unique issue links found: {len(unique_issue_links)}")
        
        if not unique_issue_links:
            # Fallback: find ALL links and filter
            print("Trying fallback: checking all links on page...")
            all_links = driver.find_elements(By.TAG_NAME, 'a')
            print(f"Total links on page: {len(all_links)}")
            
            for link in all_links[:50]:  # Check first 50 links
                try:
                    url = link.get_attribute('href') or ''
                    text = link.text.strip()
                    if url and ('/misq/vol' in url or '/vol' in url) and url not in seen_urls:
                        print(f"  Found issue link: {text[:50]} -> {url}")
                        seen_urls.add(url)
                        unique_issue_links.append(link)
                except:
                    continue
        
        # Extract unique issue URLs with metadata
        unique_issues = {}
        for link in unique_issue_links:
            try:
                url = link.get_attribute('href')
                if not url:
                    continue
                    
                # Make sure URL is absolute
                if url.startswith('/'):
                    url = 'https://misq.umn.edu' + url
                
                # Extract volume/issue from URL or text
                link_text = link.text.strip()
                
                # Try to extract from URL
                match = re.search(r'vol[^\d]*(\d+)[^\d]*issue[^\d]*(\d+)', url, re.I)
                if match:
                    vol_issue = f"Vol {match.group(1)}, Issue {match.group(2)}"
                elif link_text and len(link_text) > 3:
                    vol_issue = link_text
                else:
                    # Extract from URL path
                    parts = url.split('/')
                    vol_issue = parts[-1] if parts else f"Vol {year}"
                
                unique_issues[url] = vol_issue
                print(f"  Issue: {vol_issue} -> {url}")
            except Exception as e:
                print(f"  Error processing link: {e}")
                continue
        
        print(f"\n{'='*60}")
        print(f"Processing {len(unique_issues)} issues for year {year}...")
        print(f"{'='*60}")
        
        if len(unique_issues) == 0:
            print(f"WARNING: No issues found for year {year}!")
            print("Page HTML snippet:")
            try:
                html_snippet = driver.page_source[:1000]
                print(html_snippet)
            except:
                pass
            return 0
        
        total_articles = 0
        for issue_url, vol_issue in unique_issues.items():
            print(f"\n{'─'*60}")
            print(f"Scraping: {vol_issue}")
            print(f"URL: {issue_url}")
            count = scrape_issue_page(issue_url, vol_issue, str(year))
            total_articles += count
            print(f"  → Found {count} articles")
            time.sleep(1)  # Be respectful
        
        return total_articles
        
    except Exception as e:
        print(f"Error scraping year page {year_url}: {e}")
        import traceback
        traceback.print_exc()
        return 0

# Main scraping logic
try:
    driver.get(START_URL)
    driver.implicitly_wait(15)
    time.sleep(2)
    
    print("Starting MIS Quarterly scraper (2010-2025)...")
    print(f"Browse page: {START_URL}\n")
    
    # Find year links for 2010-2025
    year_links = {}
    
    print("Searching for year links on browse-by-year page...")
    print(f"Page title: {driver.title}")
    print(f"Current URL: {driver.current_url}\n")
    
    # Get all links and filter by year
    all_links = driver.find_elements(By.TAG_NAME, 'a')
    print(f"Total links found on page: {len(all_links)}")
    
    # First pass: look for links with year in URL or text
    for link in all_links:
        try:
            url = link.get_attribute('href') or ''
            text = link.text.strip()
            
            # Make URL absolute if relative
            if url.startswith('/'):
                url = 'https://misq.umn.edu' + url
            
            # Check if link contains year in URL or text
            for year in range(START_YEAR, END_YEAR + 1):
                year_str = str(year)
                if (year_str in url or year_str in text) and url.startswith('http'):
                    if year not in year_links:
                        year_links[year] = url
                        print(f"  ✓ Found year {year}: {text[:40]} -> {url}")
        except Exception as e:
            continue
    
    print(f"\nFound {len(year_links)} year links directly from page")
    
    # If we didn't find enough year links, try to construct URLs
    if len(year_links) < (END_YEAR - START_YEAR + 1) / 2:  # If less than half found
        print("\nTrying to construct year URLs...")
        # Common patterns for year pages
        base_patterns = [
            'https://misq.umn.edu/misq/issue/browse-by-year/{}',
            'https://misq.umn.edu/misq/issue/{}',
            'https://misq.umn.edu/misq/vol/{}'
        ]
        
        for year in range(START_YEAR, END_YEAR + 1):
            if year in year_links:
                continue  # Skip if already found
                
            for pattern in base_patterns:
                test_url = pattern.format(year)
                try:
                    driver.get(test_url)
                    time.sleep(1)
                    if '404' not in driver.title.lower() and 'not found' not in driver.title.lower():
                        year_links[year] = test_url
                        print(f"  ✓ Constructed year {year}: {test_url}")
                        break
                except:
                    continue
    
    # Go back to browse page
    driver.get(START_URL)
    time.sleep(2)
    
    print(f"\n{'='*60}")
    print(f"Total year links found: {len(year_links)}")
    print(f"Years: {sorted(year_links.keys())}")
    print(f"{'='*60}\n")
    
    # Scrape each year
    total_articles_scraped = 0
    for year in sorted(year_links.keys()):
        if START_YEAR <= year <= END_YEAR:
            print(f"\n{'='*60}")
            print(f"Scraping year {year}")
            print(f"{'='*60}")
            count = scrape_year_page(year_links[year], year)
            total_articles_scraped += count
            print(f"Year {year}: {count} articles scraped")
            time.sleep(2)
    
    print(f"\n{'='*60}")
    print(f"Scraping complete!")
    print(f"Total articles scraped: {total_articles_scraped}")
    print(f"{'='*60}")

except Exception as e:
    print(f"Exception: {e}")
    import traceback
    traceback.print_exc()

finally:
    driver.quit()

In [None]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import csv, time, os, random, re, json
from bs4 import BeautifulSoup

# -----------------------
# CONFIG
# -----------------------
START_INDEX = 0
END_INDEX = 50  # set to len(df) for all
WAIT_SEC = 25

SLEEP_MIN = 2.0
SLEEP_MAX = 4.5

BOT_MAX_WAIT_SEC = 12 * 60  # 12 minutes
BOT_POLL_SEC = 5

CSV_PATH = os.path.join(os.getcwd(), "MISQ_Issues.csv")
OUT_FILE = os.path.join(os.getcwd(), "MISQ_article_data.csv")
JOURNAL_TITLE = "MIS Quarterly"


# -----------------------
# LOAD INPUT
# -----------------------
df = pd.read_csv(CSV_PATH)
n_total = len(df)
END_INDEX = min(END_INDEX, n_total)

print(f"Total URLs: {n_total}")
print(f"Processing: {START_INDEX} -> {END_INDEX}")
print(f"Output: {OUT_FILE}\n")


# -----------------------
# OUTPUT HEADER
# -----------------------
if not os.path.exists(OUT_FILE) or os.path.getsize(OUT_FILE) == 0:
    with open(OUT_FILE, "w", newline="", encoding="utf-8") as f:
        csv.writer(f).writerow([
            "URL","Journal_Title","Article_Title","Volume_Issue","Month_Year",
            "Abstract","Keywords","Author_name","Author_email","Author_Address"
        ])


# -----------------------
# CHROME SETUP (single driver + persistent profile)
# -----------------------
opts = Options()
opts.add_argument("--user-data-dir=/tmp/misq_profile")
opts.add_argument("--profile-directory=Default")

# lighter loads
prefs = {
    "profile.managed_default_content_settings.images": 2,
    "profile.managed_default_content_settings.stylesheets": 2,
    "profile.managed_default_content_settings.fonts": 2,
}
opts.add_experimental_option("prefs", prefs)

driver = webdriver.Chrome(options=opts)
wait = WebDriverWait(driver, WAIT_SEC)


# -----------------------
# HELPERS
# -----------------------
def is_block_page(driver) -> bool:
    title = (driver.title or "").lower()
    url = (driver.current_url or "").lower()
    src = (driver.page_source or "").lower()

    strong = [
        "captcha",
        "checking your browser",
        "access denied",
        "attention required",
        "cloudflare",
        "unusual traffic",
        "verify you are human",
        "robot check",
        "ddos"
    ]
    url_signals = ["crawlprevention", "challenge", "captcha"]

    return any(s in title for s in strong) or any(s in src for s in strong) or any(s in url for s in url_signals)

def wait_until_unblocked(driver, max_wait_sec=BOT_MAX_WAIT_SEC, poll_sec=BOT_POLL_SEC) -> bool:
    start = time.time()
    warned = False
    while is_block_page(driver):
        if not warned:
            print("⚠ Verification/block detected. If Chrome shows a challenge, solve it there.")
            print("   Waiting automatically...")
            warned = True
        if time.time() - start > max_wait_sec:
            return False
        time.sleep(poll_sec)
    return True

def clean_text(s):
    s = (s or "").strip()
    s = re.sub(r"\s+", " ", s)
    return s

def safe_join_keywords(items):
    out, seen = [], set()
    for x in items:
        x = clean_text(x)
        if not x:
            continue
        k = x.lower()
        if k not in seen:
            seen.add(k)
            out.append(x)
    return "; ".join(out) if out else "N/A"

def meta_all(soup, name=None, prop=None):
    vals = []
    if name:
        for m in soup.find_all("meta", attrs={"name": name}):
            c = m.get("content")
            if c: vals.append(c)
    if prop:
        for m in soup.find_all("meta", attrs={"property": prop}):
            c = m.get("content")
            if c: vals.append(c)
    return [clean_text(v) for v in vals if clean_text(v)]

def meta_first(soup, name=None, prop=None):
    vals = meta_all(soup, name=name, prop=prop)
    return vals[0] if vals else None

def parse_jsonld(soup):
    """Return a list of JSON-LD objects (dicts)."""
    objs = []
    for s in soup.find_all("script", attrs={"type": "application/ld+json"}):
        raw = s.get_text(strip=True)
        if not raw:
            continue
        try:
            data = json.loads(raw)
            if isinstance(data, list):
                objs.extend([d for d in data if isinstance(d, dict)])
            elif isinstance(data, dict):
                objs.append(data)
        except:
            continue
    return objs

def pick_article_jsonld(jsonlds):
    """Pick the most likely Article/ScholarlyArticle object."""
    for obj in jsonlds:
        t = obj.get("@type")
        if isinstance(t, list):
            t = [x.lower() for x in t if isinstance(x, str)]
        elif isinstance(t, str):
            t = [t.lower()]
        else:
            t = []
        if any(x in t for x in ["scholarlyarticle", "article", "newsarticle"]):
            return obj
    return None

def extract_authors_from_jsonld(article_obj):
    authors = []
    if not article_obj:
        return authors
    a = article_obj.get("author")
    if isinstance(a, dict):
        name = a.get("name")
        if name: authors.append(clean_text(name))
    elif isinstance(a, list):
        for item in a:
            if isinstance(item, dict):
                name = item.get("name")
                if name: authors.append(clean_text(name))
            elif isinstance(item, str):
                authors.append(clean_text(item))
    elif isinstance(a, str):
        authors.append(clean_text(a))
    # de-dupe
    seen, out = set(), []
    for x in authors:
        k = x.lower()
        if x and k not in seen:
            seen.add(k)
            out.append(x)
    return out

def looks_like_name(s):
    if not s: return False
    s = clean_text(s)
    if len(s) < 3 or len(s) > 80: return False
    bad = {"author","authors","abstract","keywords","pdf","n/a"}
    if s.lower() in bad: return False
    if len(s.split()) > 8: return False
    return True

def extract_authors_fallback(soup):
    """
    Fallback author extraction:
    - citation_author meta tags (best)
    - visible author blocks
    """
    # 1) citation_author
    cits = []
    for m in soup.find_all("meta"):
        nm = (m.get("name") or "").lower()
        if nm == "citation_author":
            c = m.get("content")
            if c: cits.append(clean_text(c))
    cits = [c for c in cits if looks_like_name(c)]
    if cits:
        return cits

    # 2) generic visible author selectors
    selectors = [
        ".author-name", ".authors a", ".byline a", ".author a",
        ".author", ".byline span"
    ]
    names = []
    for sel in selectors:
        for el in soup.select(sel):
            t = clean_text(el.get_text(" ", strip=True))
            # split combined blocks
            for p in re.split(r"[\n,;]+", t):
                p = clean_text(p)
                if looks_like_name(p):
                    names.append(p)
        if names:
            break

    # de-dupe
    seen, out = set(), []
    for x in names:
        k = x.lower()
        if k not in seen:
            seen.add(k)
            out.append(x)
    return out

def extract_title(soup, article_obj=None):
    # 1) JSON-LD
    if article_obj:
        for k in ["headline", "name"]:
            v = article_obj.get(k)
            if isinstance(v, str) and clean_text(v):
                return clean_text(v)

    # 2) citation_title meta
    ct = None
    for m in soup.find_all("meta"):
        nm = (m.get("name") or "").lower()
        if nm == "citation_title":
            ct = clean_text(m.get("content"))
            if ct: return ct

    # 3) og:title / twitter:title / h1
    for candidate in [
        meta_first(soup, prop="og:title"),
        meta_first(soup, name="twitter:title"),
    ]:
        if candidate:
            return candidate

    h1 = soup.select_one("h1")
    if h1:
        t = clean_text(h1.get_text(" ", strip=True))
        if t: return t

    return "N/A"

def extract_abstract(soup, article_obj=None):
    # 1) JSON-LD
    if article_obj:
        for k in ["description", "abstract"]:
            v = article_obj.get(k)
            if isinstance(v, str) and len(clean_text(v)) >= 20:
                return clean_text(v)

    # 2) citation_abstract meta
    for m in soup.find_all("meta"):
        nm = (m.get("name") or "").lower()
        if nm in ("citation_abstract", "dc.description", "description"):
            v = clean_text(m.get("content"))
            if v and len(v) >= 20:
                return v

    # 3) visible blocks
    for sel in ["#abstract", ".abstract", "section.abstract", "[class*='abstract']"]:
        el = soup.select_one(sel)
        if el:
            t = clean_text(el.get_text(" ", strip=True))
            if t and len(t) >= 20:
                return t

    return "N/A"

def extract_keywords(soup, article_obj=None):
    # 1) JSON-LD
    if article_obj:
        kw = article_obj.get("keywords")
        if isinstance(kw, str):
            # sometimes comma-separated
            parts = [clean_text(x) for x in re.split(r"[;,]+", kw) if clean_text(x)]
            return safe_join_keywords(parts)
        if isinstance(kw, list):
            parts = []
            for x in kw:
                if isinstance(x, str):
                    parts.append(clean_text(x))
            return safe_join_keywords(parts)

    # 2) citation_keywords meta
    for m in soup.find_all("meta"):
        nm = (m.get("name") or "").lower()
        if nm == "citation_keywords":
            v = clean_text(m.get("content"))
            if v:
                parts = [clean_text(x) for x in re.split(r"[;,]+", v) if clean_text(x)]
                return safe_join_keywords(parts)

    # 3) visible selectors
    kws = []
    for sel in [".keyword", ".keywords a", ".keywords span", ".tag", "[class*='keyword'] a", "[class*='keyword'] span"]:
        for el in soup.select(sel):
            t = clean_text(el.get_text(" ", strip=True))
            if t: kws.append(t)
    return safe_join_keywords(kws)

# -----------------------
# MAIN LOOP
# -----------------------
try:
    for pos, (idx, row) in enumerate(df.iloc[START_INDEX:END_INDEX].iterrows(), start=1):

        url = clean_text(str(row.get("URL", "")))
        month_year = row.get("Vol Issue Year", "N/A")
        volume_issue = row.get("Volume Issue", "N/A")

        if not url.startswith("http"):
            continue

        # optional: skip clearly non-article links if Step 1 captured extras
        # (uncomment if needed)
        # if "/article" not in url and "/misq/vol" not in url:
        #     continue

        time.sleep(random.uniform(SLEEP_MIN, SLEEP_MAX))

        try:
            driver.get(url)
            try:
                wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))
            except:
                pass

            if is_block_page(driver):
                ok = wait_until_unblocked(driver)
                if not ok:
                    print(f"[{pos}] Timed out blocked. Skipping: {url}")
                    continue
                # reload after unblock
                driver.get(url)
                time.sleep(2)

            html = driver.page_source
            soup = BeautifulSoup(html, "html.parser")

            jsonlds = parse_jsonld(soup)
            article_obj = pick_article_jsonld(jsonlds)

            title = extract_title(soup, article_obj)
            abstract = extract_abstract(soup, article_obj)
            keywords = extract_keywords(soup, article_obj)

            authors = extract_authors_from_jsonld(article_obj)
            if not authors:
                authors = extract_authors_fallback(soup)

            if not authors:
                authors = ["N/A"]

            final_data = [url, JOURNAL_TITLE, title, volume_issue, month_year, abstract, keywords]

            with open(OUT_FILE, "a", newline="", encoding="utf-8") as f:
                w = csv.writer(f)
                for a in authors:
                    w.writerow(final_data + [a, "N/A", "N/A"])
                f.flush()

            print(f"[{pos}/{END_INDEX-START_INDEX}] ✓ {title[:60]} | authors: {len(authors)}")

        except Exception as e:
            print(f"[{pos}] ✗ Error on {url}: {e}")
            with open(OUT_FILE, "a", newline="", encoding="utf-8") as f:
                csv.writer(f).writerow([url, JOURNAL_TITLE, "N/A", volume_issue, month_year, "N/A", "N/A", "N/A", "N/A", "N/A"])
                f.flush()

finally:
    driver.quit()
    print(f"\nDONE. Saved to: {OUT_FILE}")

Total URLs: 1273
Processing: 0 -> 50
Output: /Users/keerthisagi/Documents/Journals/MIS_Quarterly/MISQ_article_data.csv

