In [None]:
import yfinance as yf
import pandas as pd
import requests
from bs4 import BeautifulSoup

# Step 1: Scrape the list of S&P 500 companies and their tickers
def get_sp500_companies():
    url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Find the table containing the S&P 500 companies
    table = soup.find('table', {'class': 'wikitable'})
    rows = table.find_all('tr')[1:]  # Skip the header row

    companies = []
    for row in rows:
        cols = row.find_all('td')
        company_name = cols[1].get_text(strip=True)
        ticker = cols[0].get_text(strip=True)
        companies.append({'Company Name': company_name, 'Ticker': ticker})
        
    return companies

# Step 2: Fetch company data using yfinance
def get_company_info(ticker):
    company = yf.Ticker(ticker)
    info = company.info
    return {
        'Ticker': ticker,
        'Company Name': info.get('longName', 'N/A'),
        'Sector': info.get('sector', 'N/A'),
        'Industry': info.get('industry', 'N/A'),
        'Market Cap': info.get('marketCap', 'N/A'),
        'PE Ratio': info.get('trailingPE', 'N/A'),
        'Dividend Yield': info.get('dividendYield', 'N/A'),
        'Current Price': info.get('currentPrice', 'N/A'),
        'CEO': info.get('ceo', 'N/A'),  # CEO Name
        '52 Week High': info.get('fiftyTwoWeekHigh', 'N/A'),
        '52 Week Low': info.get('fiftyTwoWeekLow', 'N/A')
    }

# Step 3: Fetch top 50 companies and their details, sorted by market cap
def main():
    companies = get_sp500_companies()
    company_data = []

    # Fetch data for each company and add market cap to the data
    for company in companies:
        ticker = company['Ticker']
        company_info = get_company_info(ticker)
        company_info['Market Cap'] = company_info.get('Market Cap', 0)  # If missing, set to 0
        company_data.append(company_info)

    # Step 4: Sort by Market Cap (descending order) and select top 50
    sorted_company_data = sorted(company_data, key=lambda x: x['Market Cap'], reverse=True)[:150]

    # Step 5: Save the data to CSV
    df = pd.DataFrame(sorted_company_data)
    df.to_csv('top_150_public_companies_sorted.csv', index=False)
    print("Data saved to 'top_150_public_companies_sorted.csv'.")

# Run the main function
if __name__ == '__main__':
    main()


In [None]:
import requests
import yfinance as yf
import pandas as pd

# Step 1: Get list of stock symbols using IEX Cloud API (Top companies)
def get_stock_symbols(iex_api_key):
    url = 'https://cloud.iexapis.com/stable/ref-data/symbols'
    params = {
        'token': iex_api_key
    }
    response = requests.get(url, params=params)
    data = response.json()

    # Filter the data to get only active companies (remove ETFs, mutual funds, etc.)
    active_companies = [item for item in data if item['isEnabled'] and item['type'] == 'stock']
    
    # Extract tickers of the active companies
    tickers = [item['symbol'] for item in active_companies]
    
    return tickers[:50]  # We want the top 50, feel free to adjust

# Step 2: Fetch company data using yfinance
def get_company_info(ticker):
    company = yf.Ticker(ticker)
    info = company.info
    return {
        'Ticker': ticker,
        'Company Name': info.get('longName', 'N/A'),
        'Sector': info.get('sector', 'N/A'),
        'Industry': info.get('industry', 'N/A'),
        'Market Cap': info.get('marketCap', 'N/A'),
        'PE Ratio': info.get('trailingPE', 'N/A'),
        'Dividend Yield': info.get('dividendYield', 'N/A'),
        'Current Price': info.get('currentPrice', 'N/A'),
        'CEO': info.get('ceo', 'N/A'),
        '52 Week High': info.get('fiftyTwoWeekHigh', 'N/A'),
        '52 Week Low': info.get('fiftyTwoWeekLow', 'N/A')
    }

# Step 3: Fetch top 50 companies and their details, sorted by market cap
def main():
    # Your IEX Cloud API key (replace with your own)
    iex_api_key = 'your_iex_api_key_here'

    # Get the list of stock tickers for top companies
    tickers = get_stock_symbols(iex_api_key)
    
    company_data = []

    # Fetch data for each company and add market cap to the data
    for ticker in tickers:
        company_info = get_company_info(ticker)
        company_info['Market Cap'] = company_info.get('Market Cap', 0)  # If missing, set to 0
        company_data.append(company_info)

    # Step 4: Sort by Market Cap (descending order) and select top 50
    sorted_company_data = sorted(company_data, key=lambda x: x['Market Cap'], reverse=True)[:50]

    # Step 5: Save the data to CSV
    df = pd.DataFrame(sorted_company_data)
    df.to_csv('top_50_public_companies_sorted_api.csv', index=False)
    print("Data saved to 'top_50_public_companies_sorted_api.csv'.")

# Run the main function
if __name__ == '__main__':
    main()


In [1]:
import requests

# Define the Crunchbase API URL and API Key
api_url = 'https://api.crunchbase.com/v3.1/organizations'
api_key = 'your_api_key_here'

# Example query for fetching women-led businesses (you can customize the query)
params = {
    'user_key': api_key,
    'query': 'female founders',  # This can be adjusted to reflect women-owned
    'page': 1
}

# Send GET request
response = requests.get(api_url, params=params)
data = response.json()

# Extract relevant information
businesses = data['data']['items']
women_owned_companies = []
for business in businesses:
    name = business['name']
    sector = business['category_group_list']
    funding = business['funding_total_usd']
    employees = business['number_of_employees']
    ceo = business.get('ceo', 'N/A')
    gender = "Female" if "woman" in ceo.lower() else "Unknown"
    women_owned_companies.append({
        'Name': name,
        'Sector': sector,
        'Funding': funding,
        'Employees': employees,
        'CEO': ceo,
        'Gender': gender
    })

# Convert to DataFrame and save to CSV
import pandas as pd
df_women = pd.DataFrame(women_owned_companies)
df_women.to_csv('women_Companies.csv', index=False)
print("Women-owned companies saved to 'women_Companies.csv'.")



TypeError: list indices must be integers or slices, not str

In [3]:
import requests
import json

# Define the Crunchbase API URL and API Key
api_url = 'https://api.crunchbase.com/v3.1/organizations'
api_key = '7130f22d328d4554bc00013a3aff6b6c'

# Example query for fetching women-led businesses (you can customize the query)
params = {
    'user_key': api_key,
    'query': 'female founders',  # This can be adjusted to reflect women-owned
    'page': 1
}

# Send GET request
response = requests.get(api_url, params=params)

# Check the structure of the response
data = response.json()

# Print the structure of the returned JSON data
print(json.dumps(data, indent=2))  # Pretty-print the JSON response

# Now, depending on the structure of the data, you will modify the way you access the data


[
  {
    "status": 401,
    "code": "LA401",
    "message": "Unauthorized user_key"
  }
]


In [4]:
import requests
import yfinance as yf
import pandas as pd

# Alpha Vantage API Key
alpha_vantage_api_key = "PP740KVKIVI10QZG"

# Example list of tickers (you can scrape this list from Yahoo Finance or any other source)
tickers = ["AAPL", "MSFT", "GOOG", "AMZN", "TSLA"]

# Function to get financial data from Alpha Vantage (using the Time Series API as an example)
def get_alpha_vantage_data(ticker):
    url = f'https://www.alphavantage.co/query'
    params = {
        'function': 'TIME_SERIES_DAILY',  # You can change this to any other function
        'symbol': ticker,
        'apikey': alpha_vantage_api_key
    }
    response = requests.get(url, params=params)
    data = response.json()
    
    # Extract daily close prices (example)
    if 'Time Series (Daily)' in data:
        time_series = data['Time Series (Daily)']
        daily_data = time_series[list(time_series.keys())[0]]  # Get the latest date's data
        return daily_data['4. close']  # Example: Close price
    else:
        return None

# Function to get CEO data using yfinance
def get_ceo(ticker):
    company = yf.Ticker(ticker)
    info = company.info
    ceo = info.get('ceo', 'N/A')
    return ceo

# Loop through tickers and collect data
company_data = []
for ticker in tickers:
    ceo = get_ceo(ticker)
    close_price = get_alpha_vantage_data(ticker)
    
    company_info = {
        'Ticker': ticker,
        'CEO': ceo,
        'Close Price (Alpha Vantage)': close_price
    }
    company_data.append(company_info)

# Save to CSV
df = pd.DataFrame(company_data)
df.to_csv('companies_with_ceo_and_stock_data.csv', index=False)
print("Data saved to 'companies_with_ceo_and_stock_data.csv'.")


OSError: [Errno 30] Read-only file system: 'companies_with_ceo_and_stock_data.csv'

In [None]:
import yfinance as yf
import pandas as pd

# Step 1: Fetch company data using yfinance (for Apple in this case)
def get_company_info(ticker):
    # Ensure the ticker is a string and handle any invalid or NaN values
    if not isinstance(ticker, str):
        print(f"Skipping invalid ticker: {ticker}")
        return None

    # Attempt to get data for the company
    try:
        company = yf.Ticker(ticker)
        info = company.info
        return {
            'Ticker': ticker,
            'Sector': info.get('sector', 'N/A'),
            'Industry': info.get('industry', 'N/A'),
            'Market Cap': info.get('marketCap', 0),  
            'PE Ratio': info.get('trailingPE', 'N/A'),
            'Dividend Yield': info.get('dividendYield', 'N/A'),
            'Current Price': info.get('currentPrice', 'N/A'),
            'CEO': info.get('ceo', 'N/A'),
            '52 Week High': info.get('fiftyTwoWeekHigh', 'N/A'),
            '52 Week Low': info.get('fiftyTwoWeekLow', 'N/A')
        }
    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")
        return None

# Step 2: Integrate Forbes Data with Yahoo Finance Data
def integrate_forbes_data(file_path):
    # Load Forbes data
    forbes_df = pd.read_csv(file_path)
    company_data = []

    # Fetch data for each company and append it
    for _, row in forbes_df.iterrows():
        company_name = row['Company Name']
        ticker = row['Ticker']

        # Skip rows where the ticker is invalid or NaN
        if pd.isna(ticker) or not isinstance(ticker, str):
            print(f"Skipping company with invalid or missing ticker: {company_name}")
            continue

        print(f"Fetching data for {company_name} ({ticker})...")
        company_info = get_company_info(ticker)

        # If data is valid, append to company_data
        if company_info:
            # Add the Forbes columns to the Yahoo Finance data
            company_info['Rank'] = row['Rank']
            company_info['Company Name'] = company_name
            company_info['Revenue'] = row['Revenue']
            company_info['Profit'] = row['Profit']
            company_info['Assets'] = row['Assets']
            company_info['Market Value'] = row['Market Value']

            company_data.append(company_info)

    # Convert the list of dictionaries into a DataFrame
    company_data_df = pd.DataFrame(company_data)

    # Step 3: Save the enriched data to a new CSV file
    company_data_df.to_csv('forbes_top_150_enriched_with_yahoo_data.csv', index=False)
    print("Data saved to 'forbes_top_150_enriched_with_yahoo_data.csv'.")

# Run the integration function
if __name__ == '__main__':
    integrate_forbes_data('forbes_with_ticker.csv')


Skipping company with invalid or missing ticker: JPMorganChase
Fetching data for Berkshire Hathaway (BRK-B)...
Skipping company with invalid or missing ticker: Saudi Arabian Oil Company (Saudi Aramco)
Fetching data for ICBC (1398.HK)...
Fetching data for Bank of America (BAC)...
Fetching data for Amazon (AMZN)...
Fetching data for China Construction Bank (601939.SS)...
Fetching data for Microsoft (MSFT)...
Fetching data for Agricultural Bank of China (601288.SS)...
Fetching data for Alphabet (GOOG)...
Fetching data for Toyota Motor (TM)...
Fetching data for Apple (AAPL)...
Fetching data for Bank of China (3988.HK)...
Skipping company with invalid or missing ticker: ExxonMobil
Fetching data for HSBC Holdings (HSBA.L)...
Fetching data for Wells Fargo (WFC)...
Fetching data for Shell Plc (SHEL)...
Fetching data for PetroChina (0857.HK)...
Fetching data for UnitedHealth Group (UNH)...
Fetching data for Walmart (WMT)...
Fetching data for Samsung Electronics (005930.KS)...
Fetching data for 