In [1]:
# Import Libraries
import pandas as pd

In [2]:
# List of companies found in EDGAR and the date of the alleged anomaly
companies_with_anomalies = [
    ("AIG", "2008", "American International Group"),
    ("FNM", "2004", "Fannie Mae (now FNMA)"),
    ("XRX", "2002", "Xerox Corporation"),
    ("Q", "2002", "Qwest Communications (now part of Lumen Technologies, ticker 'LUMN')"),
    ("WFC", "2016", "Wells Fargo"),
    ("DB", "2015", "Deutsche Bank"),
    ("UBS", "2011", "UBS Group AG"),
    ("BMY", "2002", "Bristol-Myers Squibb"),
    ("C", "2008", "Citigroup"),
    ("JPM", "2012", "JPMorgan Chase"),
    ("GSK", "2012", "GlaxoSmithKline"),
    ("BARC", "2012", "Barclays"),
    ("PBR", "2014", "Petrobras"),
    ("LUK", "2005", "Leucadia National (involved in Refco scandal, now part of Jefferies Financial Group, ticker 'JEF')"),
    ("LUMN", "2002", "Lumen Technologies (formerly CenturyLink)"),
    ("MFGP", "2012", "Micro Focus (successor of Hewlett-Packard which acquired Autonomy)"),
    ("HPQ", "2012", "HP Inc. (involved in the aftermath of Autonomy scandal)"),
    ("LU", "2000", "Lucent Technologies (now part of Alcatel-Lucent, traded as ALU and now part of Nokia, ticker 'NOK')"),
    ("GRPN", "2012", "Groupon (accounting irregularities post-IPO)"),
    ("PETR4.SA", "2014", "Petrobras (Brazil)"),
    ("LKI", "2020", "Luckin Coffee (China)"),
    ("GM", "2014", "General Motors (U.S., ignition switch scandal)"),
    ("BP", "2010", "BP (British Petroleum, Deepwater Horizon oil spill)"),
    ("MSFT", "2001", "Microsoft (Accounting investigations)"),
    ("TGT", "2008", "Target Corporation (Data breach)"),
    ("JCI", "2005", "Johnson Controls (Bribery scandal)"),
    ("HD", "2010", "Home Depot (Data breach)"),
    ("LVS", "2013", "Las Vegas Sands (Money laundering)"),
    ("NKE", "1998", "Nike (Labor practices)"),
    ("AAPL", "2014", "Apple Inc. (Antitrust investigations)"),
    ("BAC", "2008", "Bank of America (Mortgage fraud)"),
    ("TSLA", "2018", "Tesla Inc. (Elon Musk's misleading tweets)"),
    ("AMZN", "2020", "Amazon (Price gouging investigations)"),
    ("GOOG", "2019", "Google (Antitrust investigations)"),
    ("INTC", "2014", "Intel (Antitrust investigations)"),
    ("IBM", "2005", "IBM (Bribery scandal)"),
    ("PEP", "2016", "PepsiCo (Bribery scandal)"),
    ("SBUX", "2018", "Starbucks (Racial bias scandal)"),
    ("GE", "2009", "General Electric (Accounting fraud)"),
    ("NFLX", "2013", "Netflix (Insider trading)"),
    ("AMD", "2009", "AMD (Antitrust lawsuit)"),
    ("CSCO", "2013", "Cisco Systems (Whistleblower case)"),
    ("ORCL", "2011", "Oracle (Bribery scandal)"),
    ("QCOM", "2018", "Qualcomm (Antitrust lawsuit)"),
    ("EBAY", "2014", "eBay (Data breach)"),
    ("YHOO", "2016", "Yahoo (Data breach)"),
    ("BABA", "2015", "Alibaba (Counterfeit goods investigation)"),
    ("SHOP", "2020", "Shopify (Data breach)"),
    ("PYPL", "2019", "PayPal (Consumer fraud)"),
    ("SQ", "2019", "Square (Data breach)"),
    ("SPOT", "2020", "Spotify (Misleading advertising)"),
    ("UBER", "2017", "Uber (Sexual harassment scandal)"),
    ("LYFT", "2019", "Lyft (Labor practices investigation)"),
    ("TSN", "2019", "Tyson Foods (Labor practices investigation)"),
    ("MKC", "2018", "McCormick & Company (Price-fixing scandal)"),
    ("SJM", "2017", "J.M. Smucker Company (Misleading labeling)"),
    ("KHC", "2019", "Kraft Heinz (Accounting scandal)"),
    ("MDLZ", "2017", "Mondelez International (Price-fixing)"),
    ("L", "2015", "Loews Corporation (Environmental violations)"),
    ("YUM", "2013", "Yum! Brands (Food safety scandal)"),
    ("DPZ", "2018", "Domino's Pizza (Wage theft)"),
    ("PZZA", "2017", "Papa John's (Racial slur controversy)"),
    ("EAT", "2015", "Brinker International (Labor practices investigation)"),
    ("CAKE", "2019", "Cheesecake Factory (Wage theft)"),
    ("BJRI", "2020", "BJ's Restaurants (Wage theft)"),
    ("RRGB", "2018", "Red Robin Gourmet Burgers (Wage theft)"),
    ("PLAY", "2019", "Dave & Buster's (Wage theft)")
]
    

In [3]:
# Load the Company_Data CSV file
company_data = pd.read_csv('company_data.csv')

print("Columns in company_data:", company_data.columns.tolist())

Columns in company_data: ['Unnamed: 0', 'cik_str', 'ticker', 'title', 'sic', 'sicDescription', 'category', 'entityType', 'exchanges', 'fiscalYearEnd', 'stateOfIncorporation']


In [4]:
# Function to retrieve all relevant tickers for a company
def get_related_tickers(company, data):
    main_ticker_data = data[data['ticker'] == company]
    if main_ticker_data.empty:
        return []
    related_tickers = data[data['cik_str'].isin(main_ticker_data['cik_str'])]
    return related_tickers['ticker'].tolist()

In [5]:
anomaly_data = []

# Iterate through each company and get their details
for company, scandal_year, detail in companies_with_anomalies:
    related_tickers = get_related_tickers(company, company_data)
    for ticker in related_tickers:
        company_row = company_data[company_data['ticker'] == ticker].iloc[0]
        anomaly_data.append({
            'Scandal Year': scandal_year,
            'Ticker': company_row['ticker'],
            'Company Name': company_row['title'],
            'Industry': company_row['sicDescription'],
            'Filer Status': company_row['category'],
            'State of Incorporation': company_row['stateOfIncorporation'],
            'Fiscal Year End': company_row['fiscalYearEnd'],
            'Exchanges': company_row['exchanges'],
            'Detail': detail
        })

In [7]:
# Create the DataFrame
final_df = pd.DataFrame(anomaly_data)

# Save the DataFrame to a CSV file
final_df.to_csv('companies_with_anomalies.csv', index=False)

# Display the DataFrame
final_df

Unnamed: 0,Scandal Year,Ticker,Company Name,Industry,Filer Status,State of Incorporation,Fiscal Year End,Exchanges,Detail
0,2008,AIG,"AMERICAN INTERNATIONAL GROUP, INC.","Fire, Marine & Casualty Insurance",Large accelerated filer,DE,31-12,"['NYSE', 'NYSE']",American International Group
1,2008,AIG-PA,"AMERICAN INTERNATIONAL GROUP, INC.","Fire, Marine & Casualty Insurance",Large accelerated filer,DE,31-12,"['NYSE', 'NYSE']",American International Group
2,2002,XRX,Xerox Holdings Corp,"Computer Peripheral Equipment, NEC",Large accelerated filer,CT,31-12,['Nasdaq'],Xerox Corporation
3,2016,WFC,WELLS FARGO & COMPANY/MN,National Commercial Banks,Large accelerated filer,DE,31-12,"['NYSE', 'NYSE', 'NYSE', 'NYSE', 'NYSE', 'OTC'...",Wells Fargo
4,2016,WFC-PY,WELLS FARGO & COMPANY/MN,National Commercial Banks,Large accelerated filer,DE,31-12,"['NYSE', 'NYSE', 'NYSE', 'NYSE', 'NYSE', 'OTC'...",Wells Fargo
...,...,...,...,...,...,...,...,...,...
104,2015,EAT,"BRINKER INTERNATIONAL, INC",Retail-Eating Places,Large accelerated filer,DE,26-06,['NYSE'],Brinker International (Labor practices investi...
105,2019,CAKE,CHEESECAKE FACTORY INC,Retail-Eating Places,Large accelerated filer,DE,03-10,['Nasdaq'],Cheesecake Factory (Wage theft)
106,2020,BJRI,BJs RESTAURANTS INC,Retail-Eating Places,Accelerated filer,CA,03-10,['Nasdaq'],BJ's Restaurants (Wage theft)
107,2018,RRGB,RED ROBIN GOURMET BURGERS INC,Retail-Eating Places,Accelerated filer,DE,31-12,['Nasdaq'],Red Robin Gourmet Burgers (Wage theft)
