# Setup

## Import libraries & settings

In [1]:
import pandas as pd
import numpy as np
import yfinance as yf
import sys, os

In [2]:
sys.path.append("../")

In [3]:
from scripts.data_scraper import get_all_financial_data

In [4]:
import nltk

Load the company metadata

In [4]:
raw_companies = pd.read_csv("../data/raw/companies.csv")

In [5]:
raw_companies.head()

Unnamed: 0,company_name,region,market_cap,ticker,sector
0,Exxon Mobil Corporation,United States,Large-Cap,XOM,Energy
1,Royal Dutch Shell plc,UK/Netherlands,Large-Cap,RDS.A,Energy
2,PetroChina Company Limited,China,Large-Cap,PTR,Energy
3,BP p.l.c.,United Kingdom,Large-Cap,BP,Energy
4,TotalEnergies SE,France,Large-Cap,TTE,Energy


In [6]:
# check missing data
raw_companies.isna().sum()

company_name    0
region          0
market_cap      0
ticker          0
sector          0
dtype: int64

In [7]:
print(f"Shape: {raw_companies.shape}\nUnique Tickers: {raw_companies['ticker'].nunique()}")

Shape: (455, 5)
Unique Tickers: 451


In [8]:
raw_companies['ticker'].duplicated().sum()

4

In [9]:
# removing duplicate tickers
raw_companies = raw_companies.drop_duplicates(subset='ticker')
print(f"Shape: {raw_companies.shape}\nUnique Tickers: {raw_companies['ticker'].nunique()}")

Shape: (451, 5)
Unique Tickers: 451


# Financial Data

Scrape the financial data for each company

In [10]:
fin_df = get_all_financial_data(raw_companies['ticker'].tolist())

In [11]:
fin_df.head()

Unnamed: 0,index,Tax Effect Of Unusual Items,Tax Rate For Calcs,Normalized EBITDA,Net Income From Continuing Operation Net Minority Interest,Reconciled Depreciation,Reconciled Cost Of Revenue,EBITDA,EBIT,Net Interest Income,...,Fixed Assets Revaluation Reserve,Net Policyholder Benefits And Claims,Purchase Of Investment Properties,Net Income From Tax Loss Carryforward,Current Deferred Taxes Assets,Excess Tax Benefit From Stock Based Compensation,Change In Interest Payable,Receiptsfrom Government Grants,Securities Amortization,Dividends Paid Direct
0,2023-12-31,0.0,0.33,74273000000.0,36010000000.0,20641000000.0,250555000000.0,74273000000.0,53632000000.0,-849000000.0,...,,,,,,,,,,
1,2022-12-31,0.0,0.33,102591000000.0,55740000000.0,24040000000.0,295608000000.0,102591000000.0,78551000000.0,-798000000.0,...,,,,,,,,,,
2,2021-12-31,0.0,0.31,52788000000.0,23040000000.0,20607000000.0,211806000000.0,52788000000.0,32181000000.0,-947000000.0,...,,,,,,,,,,
3,2020-12-31,0.0,0.17,18284000000.0,-22440000000.0,46009000000.0,170447000000.0,18284000000.0,-27725000000.0,-1158000000.0,...,,,,,,,,,,
0,2023-12-31,-2073390000.0,0.33,49786000000.0,15239000000.0,15928000000.0,162058000000.0,43503000000.0,27575000000.0,-1915000000.0,...,,,,,,,,,,


In [12]:
fin_df['Ticker'].nunique()

402

In [13]:
# compare the available companies after scraping financial data
lost_companies = list(set(raw_companies['ticker'].unique()).difference(set(fin_df['Ticker'].unique())))

In [14]:
raw_companies.loc[raw_companies['ticker'].isin(lost_companies)]

Unnamed: 0,company_name,region,market_cap,ticker,sector
1,Royal Dutch Shell plc,UK/Netherlands,Large-Cap,RDS.A,Energy
2,PetroChina Company Limited,China,Large-Cap,PTR,Energy
10,Gazprom PJSC,Russia,Large-Cap,OGZPY,Energy
12,Woodside Petroleum Ltd,Australia,Large-Cap,WPL.AX,Energy
15,Marathon Oil Corporation,United States,Mid-Cap,MRO,Energy
17,Oil Search Limited,Papua New Guinea,Mid-Cap,OSH.AX,Energy
22,CNOOC Limited,China,Mid-Cap,CEO,Energy
23,Husky Energy Inc.,Canada,Mid-Cap,HSE.TO,Energy
32,African Petroleum Corporation Ltd,Australia,Small-Cap,APCL,Energy
34,Wentworth Resources plc,Tanzania,Small-Cap,WEN.L,Energy


# ESG Data

Scrape the ESG Scores for each company

In [15]:
raw_companies.sample()

Unnamed: 0,company_name,region,market_cap,ticker,sector
416,Telkom SA SOC Ltd.,South Africa,Small-Cap,TKG.JO,Communication Services


In [16]:
esg_ticker = yf.Ticker("SU.PA")

In [17]:
sus = esg_ticker.sustainability.T

In [18]:
# add the ticker column
sus['Ticker'] = "SU.PA"

In [19]:
sus

Unnamed: 0,maxAge,totalEsg,environmentScore,socialScore,governanceScore,ratingYear,ratingMonth,highestControversy,peerCount,esgPerformance,...,furLeather,gambling,gmo,militaryContract,nuclear,pesticides,palmOil,coal,tobacco,Ticker
esgScores,86400,10.04,3.6,3.95,2.5,2025,2,2.0,109,LAG_PERF,...,False,False,False,False,False,False,False,False,False,SU.PA


In [20]:
# add the rating date using the ratingYear and ratingMonth
sus['ratingDate'] = pd.to_datetime(sus['ratingYear'].astype(str) + '-' + sus['ratingMonth'].astype(str), format='%Y-%m')

In [21]:
sus

Unnamed: 0,maxAge,totalEsg,environmentScore,socialScore,governanceScore,ratingYear,ratingMonth,highestControversy,peerCount,esgPerformance,...,gambling,gmo,militaryContract,nuclear,pesticides,palmOil,coal,tobacco,Ticker,ratingDate
esgScores,86400,10.04,3.6,3.95,2.5,2025,2,2.0,109,LAG_PERF,...,False,False,False,False,False,False,False,False,SU.PA,2025-02-01


In [22]:
def scrape_esg(tickers):
    company_data = []  # list to store each company's data as their own df
    
    for ticker_str in tickers:
        ticker = yf.Ticker(ticker_str)

        df_temp = ticker.sustainability.T
        df_temp['Ticker'] = ticker_str

        company_data.append(df_temp)

    df_final = pd.concat(company_data)
    return df_final

In [23]:
esg_df = scrape_esg(raw_companies['ticker'].tolist())

404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/RDS.A?modules=esgScores&corsDomain=finance.yahoo.com&formatted=false&symbol=RDS.A&crumb=N07q.Qb24kq
404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/PTR?modules=esgScores&corsDomain=finance.yahoo.com&formatted=false&symbol=PTR&crumb=N07q.Qb24kq
404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/BP?modules=esgScores&corsDomain=finance.yahoo.com&formatted=false&symbol=BP&crumb=N07q.Qb24kq
404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/TTE?modules=esgScores&corsDomain=finance.yahoo.com&formatted=false&symbol=TTE&crumb=N07q.Qb24kq
404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/2222.SR?modules=esgScores&corsDomain=finance.yahoo.com&formatted=false&symbol=2222.SR&crumb=N07q.Qb24kq
404 Client Error: Not Found for url

In [24]:
esg_df

Unnamed: 0,maxAge,totalEsg,environmentScore,socialScore,governanceScore,ratingYear,ratingMonth,highestControversy,peerCount,esgPerformance,...,furLeather,gambling,gmo,militaryContract,nuclear,pesticides,palmOil,coal,tobacco,Ticker
esgScores,86400,43.66,25.09,11.58,6.99,2025,2,3.0,165,LEAD_PERF,...,False,False,False,False,False,False,False,False,False,XOM
esgScores,86400,38.36,19.91,10.61,7.84,2025,2,3.0,165,LEAD_PERF,...,False,False,False,False,False,False,False,False,False,CVX
esgScores,86400,41.04,24.19,10.65,6.2,2023,9,2.0,4,LEAD_PERF,...,False,False,False,False,False,False,False,False,False,RELIANCE.NS
esgScores,86400,32.57,17.96,7.32,7.29,2025,2,2.0,165,LEAD_PERF,...,False,False,False,False,False,False,False,False,False,CNQ
esgScores,86400,36.81,23.92,8.48,4.41,2025,2,3.0,165,LEAD_PERF,...,False,False,False,False,False,False,False,False,False,SU
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
esgScores,86400,8.15,2.44,2.91,2.8,2025,2,1.0,512,LAG_PERF,...,False,False,False,False,False,False,False,False,False,C09.SI
esgScores,86400,13.82,2.3,4.33,7.19,2025,2,1.0,512,LAG_PERF,...,False,False,False,False,False,False,False,False,False,VNO
esgScores,86400,13.66,2.67,4.57,6.42,2025,2,0.0,512,LAG_PERF,...,False,False,False,False,False,False,False,False,False,0688.HK
esgScores,86400,20.23,6.31,8.27,5.65,2025,2,0.0,512,AVG_PERF,...,False,False,False,False,False,False,False,False,False,U14.SI


In [25]:
esg_df.shape

(360, 37)

In [26]:
lost_companies_2 = list(set(raw_companies['ticker'].unique()).difference(set(esg_df['Ticker'].unique())))

In [27]:
raw_companies.loc[raw_companies['ticker'].isin(lost_companies_2)]

Unnamed: 0,company_name,region,market_cap,ticker,sector
1,Royal Dutch Shell plc,UK/Netherlands,Large-Cap,RDS.A,Energy
2,PetroChina Company Limited,China,Large-Cap,PTR,Energy
3,BP p.l.c.,United Kingdom,Large-Cap,BP,Energy
4,TotalEnergies SE,France,Large-Cap,TTE,Energy
6,Saudi Arabian Oil Company (Aramco),Saudi Arabia,Large-Cap,2222.SR,Energy
...,...,...,...,...,...
428,Unibail-Rodamco-Westfield SE,France,Large-Cap,URW.AS,Real Estate
435,CapitaLand Limited,Singapore,Mid-Cap,C31.SI,Real Estate
445,Growthpoint Properties Limited,South Africa,Small-Cap,GRTJ.J,Real Estate
449,"Ayala Land, Inc.",Philippines,Small-Cap,ALI.PS,Real Estate


In [1]:
raw_companies.loc[~raw_companies['ticker'].isin(lost_companies_2), 'market_cap'].value_counts()

NameError: name 'raw_companies' is not defined

Merging financials w/ esg

In [29]:
fin_df.rename(columns={"index": "Date"}, inplace=True)

In [30]:
foo = fin_df.merge(esg_df, how='left', on='Ticker')

In [31]:
foo.insert(0, "Ticker", foo.pop("Ticker"))

  foo.insert(0, "Ticker", foo.pop("Ticker"))


In [32]:
foo.insert(2, "totalEsg", foo.pop("totalEsg"))

  foo.insert(2, "totalEsg", foo.pop("totalEsg"))


In [33]:
# drop rows with missing esgScore
foo = foo.dropna(subset="totalEsg")
foo.shape

(1530, 377)

In [34]:
foo['ratingDate'] = pd.to_datetime(foo['ratingYear'].astype(str) + '-' + foo['ratingMonth'].astype(str), format='%Y-%m')

  foo['ratingDate'] = pd.to_datetime(foo['ratingYear'].astype(str) + '-' + foo['ratingMonth'].astype(str), format='%Y-%m')


In [35]:
foo

Unnamed: 0,Ticker,Date,totalEsg,Tax Effect Of Unusual Items,Tax Rate For Calcs,Normalized EBITDA,Net Income From Continuing Operation Net Minority Interest,Reconciled Depreciation,Reconciled Cost Of Revenue,EBITDA,...,furLeather,gambling,gmo,militaryContract,nuclear,pesticides,palmOil,coal,tobacco,ratingDate
0,XOM,2023-12-31,43.66,0.0,0.33,74273000000.0,36010000000.0,20641000000.0,250555000000.0,74273000000.0,...,False,False,False,False,False,False,False,False,False,2025-02-01
1,XOM,2022-12-31,43.66,0.0,0.33,102591000000.0,55740000000.0,24040000000.0,295608000000.0,102591000000.0,...,False,False,False,False,False,False,False,False,False,2025-02-01
2,XOM,2021-12-31,43.66,0.0,0.31,52788000000.0,23040000000.0,20607000000.0,211806000000.0,52788000000.0,...,False,False,False,False,False,False,False,False,False,2025-02-01
3,XOM,2020-12-31,43.66,0.0,0.17,18284000000.0,-22440000000.0,46009000000.0,170447000000.0,18284000000.0,...,False,False,False,False,False,False,False,False,False,2025-02-01
12,CVX,2023-12-31,38.36,0.0,0.276,45042000000.0,21369000000.0,14989000000.0,136522000000.0,45042000000.0,...,False,False,False,False,False,False,False,False,False,2025-02-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1699,U14.SI,2020-12-31,20.23,-13214490.295817,0.214936,359302000.0,13141000.0,117880000.0,1249563000.0,297821000.0,...,False,False,False,False,False,False,False,False,False,2025-02-01
1700,8804.T,2023-12-31,16.92,-349140000.0,0.33,97417000000.0,45084000000.0,20686000000.0,266829000000.0,96359000000.0,...,False,False,False,False,False,False,False,False,False,2025-02-01
1701,8804.T,2022-12-31,16.92,-232023585.657371,0.297466,88737000000.0,43062000000.0,19113000000.0,248452000000.0,87957000000.0,...,False,False,False,False,False,False,False,False,False,2025-02-01
1702,8804.T,2021-12-31,16.92,3595673169.257033,0.360903,71800000000.0,34965000000.0,18862000000.0,247933000000.0,81763000000.0,...,False,False,False,False,False,False,False,False,False,2025-02-01


In [36]:
foo['Ticker'].nunique()

360

In [37]:
# checking market cap dist. of remaining companies
raw_companies.loc[raw_companies['ticker'].isin(foo['Ticker'].unique()), 'market_cap'].value_counts()

market_cap
Large-Cap    132
Mid-Cap      126
Small-Cap    102
Name: count, dtype: int64

In [39]:
foo.to_csv("../data/raw/financial_data.csv", index=False)

# Text Data

Collection news info

In [None]:
tickers = raw_companies['ticker'].tolist()

In [None]:
raw_companies.sample()

In [None]:
sample = yf.Ticker("XOM")

In [None]:
len(sample.news)

In [None]:
sample.news[0]['content']

In [None]:
sample.news

In [None]:
from datetime import datetime

In [None]:
def get_stock_news(ticker_symbol: str, cutoff_date: str, max_articles: int = 100) -> pd.DataFrame:
    try:
        # Convert cutoff_date to datetime
        cutoff_dt = pd.to_datetime(cutoff_date)
        
        # Initialize ticker
        ticker = yf.Ticker(ticker_symbol)
        
        # Get news
        news = ticker.news
        
        if not news:
            return pd.DataFrame()
            
        # Extract relevant information from nested structure
        processed_news = []
        for article in news:
            content = article.get('content', {})
            provider = content.get('provider', {})
            thumbnail = content.get('thumbnail', {})
            
            processed_article = {
                'Title': content.get('title'),
                'Summary': content.get('summary'),
                'Date': pd.to_datetime(content.get('pubDate')),
                'Publisher': provider.get('displayName'),
                'Publisher_URL': provider.get('url'),
                'Content_Type': content.get('contentType'),
                'Is_Premium': content.get('finance', {}).get('premiumFinance', {}).get('isPremiumNews', False),
                'Article_URL': content.get('previewUrl'),
                'Thumbnail_URL': thumbnail.get('originalUrl') if thumbnail else None
            }
            processed_news.append(processed_article)
            
        # Convert to DataFrame
        news_df = pd.DataFrame(processed_news)
        
        # Filter articles by cutoff date
        # news_df = news_df[news_df['Date'] <= cutoff_dt]
        
        # Sort by date in descending order
        news_df = news_df.sort_values('Date', ascending=False)
        
        # Limit the number of articles
        news_df = news_df.head(max_articles)
        
        return news_df
        
    except Exception as e:
        print(f"Error fetching news for {ticker_symbol}: {str(e)}")
        return pd.DataFrame()

In [None]:
news_df = get_stock_news("XOM", "2023-12-31")

In [None]:
news_df

In [None]:
len(sample.get_news(count=5))

---

In [5]:
import requests
from bs4 import BeautifulSoup
import re
import nltk
from datetime import datetime, timedelta

# Uncomment if you have not downloaded the punkt tokenizer
# nltk.download('punkt')
from nltk.tokenize import sent_tokenize


In [6]:

# ----------------------------
# Configuration and Constants
# ----------------------------

# Define your time period (e.g., monthly intervals for 2018-2020)
START_YEAR = 2018
END_YEAR = 2020

# Define a list of companies with their short and long names
companies = [
    {"long_name": "Visa Corporation", "short_name": "Visa"},
    # Add more companies as needed
]

# Define the ESG keywords (adjust these to the 10 main themes mentioned in the paper)
esg_keywords = ["environment", "social", "governance", "sustainability", "climate", 
                "diversity", "ethics", "responsibility", "transparency", "risk"]

# GDELT API endpoint (this is an example; check the actual API documentation for correct URL and parameters)
GDELT_API_URL = "https://api.gdeltproject.org/api/v2/doc/doc"  # adjust if needed

# Maximum articles per month per company
MAX_ARTICLES_PER_MONTH = 250

# Minimum article length after extraction
MIN_ARTICLE_LENGTH = 200


In [7]:

# ----------------------------
# Helper Functions
# ----------------------------

def build_query(company, start_date, end_date):
    """
    Build the query parameters for the GDELT API.
    
    - company: dictionary with 'long_name' and 'short_name'
    - start_date and end_date in YYYYMMDDHHMMSS format (GDELT often uses a precise timestamp)
    """
    # Combine ESG keywords with company names. One approach is to require that at least one ESG keyword is present
    # and that the company name (either long or short) is mentioned at least twice.
    # The exact query syntax depends on the GDELT API. Here, we assume a parameter "query" where you can combine terms.
    
    company_query = f'("{company["long_name"]}" OR "{company["short_name"]}")'
    esg_query = " OR ".join([f'"{kw}"' for kw in esg_keywords])
    
    # A naive way to ensure the company name appears at least twice might be to include it twice in the query.
    # However, if the API supports frequency filters, use those instead.
    combined_query = f'({company_query} AND ({esg_query}))'
    
    params = {
        "query": combined_query,
        "mode": "ArtList",           # example mode that might return URLs and metadata
        "startdatetime": start_date,
        "enddatetime": end_date,
        "maxrecords": MAX_ARTICLES_PER_MONTH,
        # include additional parameters as required by the API (e.g., language, source country, etc.)
    }
    return params

def fetch_article_urls(query_params):
    """
    Send a request to the GDELT API and return a list of article URLs.
    """
    try:
        response = requests.get(GDELT_API_URL, params=query_params, timeout=10)
        response.raise_for_status()
    except Exception as e:
        print(f"Error fetching from GDELT API: {e}")
        return []
    
    # Parse the API response. Here we assume a JSON response with a field 'articles'
    data = response.json()
    # Adjust according to the actual response structure; for instance:
    urls = []
    if 'articles' in data:
        for article in data['articles']:
            if 'url' in article:
                urls.append(article['url'])
    return urls

def crawl_article(url):
    """
    Download the article content and return the HTML text if successful.
    """
    try:
        resp = requests.get(url, timeout=10)
        resp.raise_for_status()
    except Exception as e:
        print(f"Error crawling URL {url}: {e}")
        return None

    return resp.text

def extract_and_clean_text(html_content):
    """
    Use Beautiful Soup to extract the text and perform cleaning:
      - Remove articles with fewer than MIN_ARTICLE_LENGTH characters.
      - Remove links, e-mail addresses, date/time strings, Unicode artifacts, etc.
    """
    soup = BeautifulSoup(html_content, "html.parser")
    
    # Remove script and style elements
    for script_or_style in soup(["script", "style"]):
        script_or_style.decompose()
    
    # Extract text content
    text = soup.get_text(separator=" ", strip=True)
    
    # Remove unwanted characters and patterns:
    # Remove URLs
    text = re.sub(r'http\S+', '', text)
    # Remove email addresses
    text = re.sub(r'\S+@\S+', '', text)
    # Remove date/time patterns (this regex is simplistic; adjust as needed)
    text = re.sub(r'\b(?:\d{1,2}[-/th|st|nd|rd\s]*){2,}\d{2,4}\b', '', text)
    # Remove extra whitespace and newlines
    text = re.sub(r'\s+', ' ', text)
    
    # Check length
    if len(text) < MIN_ARTICLE_LENGTH:
        return None
    
    return text

def extract_relevant_text(full_text):
    """
    Extract the title (if available) and the first 5 sentences.
    Here we assume the first header (e.g., <h1>) in the HTML could be the title.
    If not, you might need to modify this function.
    """
    # For demonstration, we assume the title is the first sentence or is provided separately.
    # Otherwise, you could pass in the title from the metadata if available.
    sentences = sent_tokenize(full_text)
    if not sentences:
        return None
    
    # Get the first 5 sentences; if fewer exist, take what is available.
    selected_sentences = sentences[:5]
    relevant_text = " ".join(selected_sentences)
    return relevant_text


In [None]:

def run_scraper():
    final_dataset = {}
    
    # Loop over each company
    for company in companies:
        company_articles = []
        print(f"Processing company: {company['long_name']}")
        
        # Loop over years and then over months (adjust time format as needed by GDELT)
        for year in range(START_YEAR, END_YEAR + 1):
            for month in range(1, 13):
                # Define the start and end of the month in the required format (e.g., YYYYMMDDHHMMSS)
                start_date = datetime(year, month, 1)
                # Get the last day of the month
                if month == 12:
                    end_date = datetime(year + 1, 1, 1) - timedelta(seconds=1)
                else:
                    end_date = datetime(year, month + 1, 1) - timedelta(seconds=1)
                
                # Convert dates to string in the required format
                start_str = start_date.strftime("%Y%m%d%H%M%S")
                end_str = end_date.strftime("%Y%m%d%H%M%S")
                
                # Build query parameters
                params = build_query(company, start_str, end_str)
                urls = fetch_article_urls(params)
                print(f"Month {month}/{year}: Found {len(urls)} URLs.")
                
                # Crawl and process each article URL
                for url in urls:
                    html = crawl_article(url)
                    if not html:
                        continue
                    
                    text = extract_and_clean_text(html)
                    if not text:
                        continue
                    
                    # Optionally: Check if company name appears at least twice
                    if (text.count(company["long_name"]) + text.count(company["short_name"])) < 2:
                        continue
                    
                    # Extract title and first 5 sentences (or use a different extraction logic if title is available separately)
                    relevant_text = extract_relevant_text(text)
                    if not relevant_text:
                        continue
                    
                    company_articles.append(relevant_text)
                    
                    # Optionally limit to 3000 articles per year if needed
                    if len(company_articles) >= 3000:
                        break
                
                # End of month loop
                
        # Remove companies with fewer than 5 articles
        if len(company_articles) < 5:
            print(f"Skipping company {company['long_name']} due to insufficient articles ({len(company_articles)} found).")
            continue
        
        final_dataset[company['long_name']] = company_articles
        print(f"Collected {len(company_articles)} articles for {company['long_name']}.")

    return final_dataset


In [None]:

dataset = run_scraper()
import json
with open("news_articles_dataset.json", "w", encoding="utf-8") as f:
    json.dump(dataset, f, ensure_ascii=False, indent=2)


Processing company: Visa Corporation
Error fetching from GDELT API: 429 Client Error: Too Many Requests for url: https://api.gdeltproject.org/api/v2/doc/doc?query=%28%28%22Visa+Corporation%22+OR+%22Visa%22%29+AND+%28%22environment%22+OR+%22social%22+OR+%22governance%22+OR+%22sustainability%22+OR+%22climate%22+OR+%22diversity%22+OR+%22ethics%22+OR+%22responsibility%22+OR+%22transparency%22+OR+%22risk%22%29%29&mode=ArtList&startdatetime=20180101000000&enddatetime=20180131235959&maxrecords=250
Month 1/2018: Found 0 URLs.
Error fetching from GDELT API: 429 Client Error: Too Many Requests for url: https://api.gdeltproject.org/api/v2/doc/doc?query=%28%28%22Visa+Corporation%22+OR+%22Visa%22%29+AND+%28%22environment%22+OR+%22social%22+OR+%22governance%22+OR+%22sustainability%22+OR+%22climate%22+OR+%22diversity%22+OR+%22ethics%22+OR+%22responsibility%22+OR+%22transparency%22+OR+%22risk%22%29%29&mode=ArtList&startdatetime=20180201000000&enddatetime=20180228235959&maxrecords=250
Month 2/2018: Fou