# PROQUEST URL Text Extraction
This notebook can be used to login to ProQuest utilizing your rice NetID and extract text data from a .csv file which was previously saved from a custom search. 

## Run the cell below and Log in Utilizing your Rice NetID

In [5]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from getpass import getpass

# ProQuest login URL
login_url = 'https://www.proquest.com/login'

# Prompt the user to enter their ProQuest login credentials
username = input("Enter your username: ")
password = getpass("Enter your password: ")

# URL of the page to scrape
target_url = 'http://ezproxy.rice.edu/login?url=https://www.proquest.com/search/2623374?accountid=7064'

# Create a session object to persist the login session
session = requests.Session()

# Login payload with your credentials
login_payload = {
    'username': username,
    'password': password
}

# Send a POST request to the login URL with your credentials
response = session.post(login_url, data=login_payload)

# Check if the login was successful
if response.status_code == 200:
    print('Login successful')
    # Send a GET request to the target URL
    response = session.get(target_url)
    if response.status_code == 200:
        print('Successfully accessed the target URL')
    else:
        print('Failed to access the target URL')
else:
    print('Login failed')

Enter your username: jl351
Enter your password: ········
Login successful
Successfully accessed the target URL


## Combine all .csv files in referenced directory and format for text extraction

Set the directory where the ProQeust .csv search files are stored

Note: .csv files have a defined naming convention (ProQuest_Articles-****_1) astricks represent company ticker

Ensure to re-name the .csv export of the dataframe prior to text extraction with the correct ticker. This will be a reference file. 

In [8]:
import pandas as pd
import os

# Set the directory containing your CSV files #
directory = 'ProQuestURLs_CVX'

# List all CSV files in the directory
csv_files = [f for f in os.listdir(directory) if f.endswith('.csv')]

# Initialize an empty list to store DataFrames
dataframes = []

# Increase the maximum width in characters of a column
pd.set_option('display.max_colwidth', None)

# Loop over the CSV files
for file in csv_files:
    # Extract the ticker from the filename
    ticker = file.split('-')[1].split('_')[0]
    
    # Load the CSV file into a DataFrame
    df = pd.read_csv(os.path.join(directory, file))
    
    # Ensure the 'DocumentURL' and 'pubdate' columns are treated as strings
    df['DocumentURL'] = df['DocumentURL'].astype(str)
    df['pubdate'] = df['pubdate'].astype(str)
    df['Title'] = df['Title'].astype(str)
    
    # Split the URL at 'url=' and keep the part after it
    df['DocumentURL'] = df['DocumentURL'].apply(
        lambda x: x.split('url=', 1)[1] if 'url=' in x else x
    )
    
    # Further refine to stop at the first space if it exists
    df['DocumentURL'] = df['DocumentURL'].apply(
        lambda x: x.split(' ')[0] if ' ' in x else x)
    
    # Rename 'pubdate' to 'Date'
    df.rename(columns={'pubdate': 'Date'}, inplace=True)
    
    # Rename 'StoreId' to 'UniqueID'
    df.rename(columns={'StoreId': 'UniqueID'}, inplace=True)
    
    # Add the ticker column
    df['Ticker'] = ticker
    
    # Append the DataFrame to the list
    dataframes.append(df)

# Concatenate all DataFrames into one
PQ_df1 = pd.concat(dataframes, ignore_index=True)

#Change to save data prior to text extraction
PQ_df1.to_csv('ProQuest_Articles_DF_CVX.csv', index=False)

# Display the head of the final DataFrame to check the results
print(PQ_df1[['UniqueID', 'Date', 'Title', 'DocumentURL', 'Ticker']].head())

     UniqueID          Date  \
0  2703147472  Aug 17, 2022   
1  2702197510  Aug 15, 2022   
2  2697049245   Aug 2, 2022   
3  2696371267  Jul 30, 2022   
4  2695835797  Jul 29, 2022   

                                                                                                                                                                                Title  \
0                                     Oil Giants Must Face Climate-Liability Suits in States, Appeals Court Rules; Decision is setback for companies such as Exxon, Chevron and Shell   
1                          Warren Buffett's Berkshire Hathaway Keeps Spending Through Volatile Markets; Apple, Bank of America, Coca-Cola, Chevron are among Berkshire's top holdings   
2  Investors Put Forward More Proposals, Dialing Up Pressure on Companies; Apple, Activision Blizzard and Chevron saw proposals gain considerable support in this year's proxy season   
3                                                                        

## Extract text from each URL and create new DF/.csv with final format 
### WARNING: THIS WILL TAKE SUBSTANTIAL AMOUNT OF TIME AND LIMIT USE OF COMPUTER

Ensure to change the name of the .csv that will be saved to the desired name.

In [4]:
import pandas as pd
import re
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

def fetch_article_text_with_selenium(url):
    # Set up Safari WebDriver
    driver = webdriver.Safari()
    driver.get(url)
    time.sleep(5)  # Allow some time for the page to load

    try:
        # Handling the cookie acceptance
        print("Page loaded, looking for the cookie button...")
        cookie_button_selector = '#onetrust-button-group > #onetrust-accept-btn-handler'
        WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, cookie_button_selector))
        )
        cookie_button = driver.find_element(By.CSS_SELECTOR, cookie_button_selector)
        print("Cookie button found, attempting to click...")

        # Scroll into view and click using JavaScript
        driver.execute_script("arguments[0].scrollIntoView(true);", cookie_button)
        driver.execute_script("arguments[0].click();", cookie_button)
        time.sleep(10)  # Wait for any overlays to disappear

        print("Cookie button clicked.")
        
        # Handling the additional pop-up
        print("Looking for the additional pop-up button...")
        popup_button_selector = '#pendo-button-6be79a6f'
        WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, popup_button_selector))
        )
        popup_button = driver.find_element(By.CSS_SELECTOR, popup_button_selector)
        print("Pop-up button found, attempting to click...")

        # Scroll into view and click using JavaScript
        driver.execute_script("arguments[0].scrollIntoView(true);", popup_button)
        driver.execute_script("arguments[0].click();", popup_button)
        time.sleep(5)  # Wait for any overlays to disappear

        print("Pop-up button clicked.")
   
        # Fetching the article content
        print("Looking for the article content...")
        content = driver.find_element(By.TAG_NAME, 'body').text
        print("Content found.")

        # Primary keywords
        start_keyword = "OverlayEnd"
        end_keyword = "Word Count:"

        # Alternative start keyword
        alternative_start_keyword = "| Jump to"

        # Use regular expressions for case-insensitive search
        start_pattern = re.compile(re.escape(start_keyword), re.IGNORECASE)
        end_pattern = re.compile(re.escape(end_keyword), re.IGNORECASE)
        alternative_start_pattern = re.compile(re.escape(alternative_start_keyword), re.IGNORECASE)

        # Find the start and end indices using the patterns
        start_match = start_pattern.search(content)
        end_match = end_pattern.search(content)

        if start_match and end_match:
            start_index = start_match.end()
            end_index = end_match.start()
            content = content[start_index:end_index].strip()
        else:
            # Try with the alternative start keyword
            alternative_start_match = alternative_start_pattern.search(content)
            if alternative_start_match and end_match:
                start_index = alternative_start_match.end()
                end_index = end_match.start()
                content = content[start_index:end_index].strip()
            else:
                content = "Relevant content not found within the specified range."

    except Exception as e:
        content = f"Failed to load content: {str(e)}"
        print(content)
    finally:
        driver.quit()
    
    return content

results = []
for index, row in PQ_df1.iterrows():
    url = row['DocumentURL']
    article_text = fetch_article_text_with_selenium(url)
    article_title = row['Title']
    unique_id = row['UniqueID']
    ticker = row['Ticker']
    date = row['Date']
    
    results.append({'Unique_ID': unique_id, 'Date': date, 'Title': article_title, 'URL': url, 'Ticker': ticker, 'Full Article Text': article_text})    
    
# Convert results to DataFrame
results_df = pd.DataFrame(results)

# Save to CSV
results_df.to_csv('ProQuest_Articles_FINAL.csv', index=False)
print('All articles have been saved in ProQuest_Articles_FINAL.csv.')

Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Failed to load conten

Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content f

Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button c

Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Failed to load content: Message: 

Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
C

Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button c

Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button c

Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button c

Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, 

Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content f

Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Failed to load content: Message: 

Page loaded, looking for the cookie button...
Failed to load content: Message: 

Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button cl

Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, 

Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, 

Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, 

Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content f

Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content f

Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content f

Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button c

Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content f

Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content f

Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content f

Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button c

Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button c

Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button c

Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button c

Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Failed to load content: Message: 

Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attemptin

Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Failed to load content: Message: 

Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attemptin

Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content f

Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button c

Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, 

Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content f

Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Failed to load content: Message: 

Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attemptin

Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button c

Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button c

Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content f

Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button c

Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Failed to load content: Message: 

Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Failed to load content: Messag

Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content f

Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content f

Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button c

Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, 

Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Failed to load content: Message: 

Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
C

Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content f

Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content f

Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content f

Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, 

Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content f

Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button c

Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, 

Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, attempting to click...
Pop-up button clicked.
Looking for the article content...
Content found.
Page loaded, looking for the cookie button...
Cookie button found, attempting to click...
Cookie button clicked.
Looking for the additional pop-up button...
Pop-up button found, 

## Additional Cleaning of data if needed

In [None]:
import pandas as pd
import re

# Load the CSV file
df = pd.read_csv('ProQuest_Articles_FINAL.csv')

# Assuming the maximum limit for a cell in a CSV is typically around 32767 characters
max_limit = 32767

# Function to truncate and remove text after "Credit:" and "Word count:"
def process_text(text):
    if isinstance(text, str):
        # Truncate to the maximum limit
        text = text[:max_limit]
        # Remove text following "Credit:" (case insensitive)
        text = re.sub(r'(?i)\bCredit:.*', '', text)
        # Remove text following "Word count:" (case insensitive)
        text = re.sub(r'(?i)\bWord count:.*', '', text)
    return text

# Apply the function to each row in the 'full article text' column
df['Full Article Text'] = df['Full Article Text'].apply(process_text)

# Save the modified DataFrame to a new CSV file
df.to_csv('ProQuest_Articles_FINAL.csv', index=False)

print('CSV file has been updated and saved as ProQuest_Articles_FINAL.csv.csv.')