In [1]:
# Get the top n number of companies in the Indian, Australian and USA markets
from bs4 import BeautifulSoup
import pandas as pd
import requests

# Define the number
n=10

def market_companies(url, market, name, symbol, n=1):
    # Send a GET request to the URL and parse the webpage with BeautifulSoup
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    if market.upper() == "INDIA":
        # Find all elements with class 'name-td'
        td_elements = soup.find_all('td', class_='name-td')
        
        # Extract company names and codes, up to the specified limit n
        company_data = []
        for td in td_elements[:n]:
            # Find the nested div elements
            name_div = td.find('div', class_='name-div')
            # Extract company name and code from the nested divs
            company_name = name_div.find('div', class_='company-name').text.strip()
            company_code = name_div.find('div', class_='company-code').text.strip()
            
            if company_code.endswith(".NS"):
                company_code = company_code.replace(".NS", ".BSE")
            elif not company_code.endswith(".BSE"):
                company_code += ".BSE"
            
            company_data.append({"Market": market.upper(), 'Name': company_name, 'Code': company_code})

            
            df = pd.DataFrame(company_data)
    
    elif market.upper() in ("USA", "AUS"):
        # Find all rows in the table
        rows = soup.find_all('tr')

        # Create lists to store the symbols and names
        symbols = []
        names = []

        # Iterate over each row, up to the specified limit n
        for row in rows[1:n+1]:
            # Find the symbol and name in each row
            namedata = row.find('td', {'class': name})
            symboldata = row.find('td', {'class': symbol})

            # If both symbol and name are found, append them to the lists
            if symboldata and namedata:
                symbols.append(symboldata.text.strip())
                names.append(namedata.text.strip())

        # Create a DataFrame from the lists
        df = pd.DataFrame({"Market": market.upper(), 'Name': names, 'Code': symbols})

    else:
        raise ValueError("Invalid market value. Please provide one of: 'INDIA', 'USA', 'AUS'.")

    return df

# Example usage
df_India = market_companies("https://companiesmarketcap.com/india/largest-companies-in-india-by-market-cap/", "INDIA", 'tablebluelink', 'tdcolumn', n)
df_USA = market_companies("https://stockanalysis.com/list/sp-500-stocks/","USA",'slw svelte-eurwtr','sym svelte-eurwtr', n)
df_Aus= market_companies("https://stockanalysis.com/list/australian-securities-exchange/","AUS",'slw svelte-eurwtr','sym svelte-eurwtr', n)

df_companies = pd.concat([df_India, df_Aus, df_USA], ignore_index=True)
df_companies

Unnamed: 0,Market,Name,Code
0,INDIA,Reliance Industries,RELIANCE.BSE
1,INDIA,Tata Consultancy Services,TCS.BSE
2,INDIA,HDFC Bank,HDB.BSE
3,INDIA,ICICI Bank,IBN.BSE
4,INDIA,Bharti Airtel,BHARTIARTL.BSE
5,INDIA,State Bank of India,SBIN.BSE
6,INDIA,Life Insurance Corporation of India (LIC),LICI.BSE
7,INDIA,Infosys,INFY.BSE
8,INDIA,ITC,ITC.BSE
9,INDIA,Hindustan Unilever,HINDUNILVR.BSE


In [3]:
# Alphavantage doesnt contain australian shares.

# We could use the following code using rapidapi finance to get the end of day stock price.

import requests
import datetime

url = "https://yh-finance.p.rapidapi.com/stock/v3/get-historical-data"

querystring = {"symbol":"BHP","region":"AU"}

headers = {
    "X-RapidAPI-Key": "d8b94de3e2msh6cb4fc35f92fb1ap12b426jsn34824f23c008",
    "X-RapidAPI-Host": "yh-finance.p.rapidapi.com"
}

response = requests.get(url, headers=headers, params=querystring)

data=(response.json())
prices_data = data['prices']

# Create DataFrame
prices = pd.DataFrame(prices_data)

prices

Unnamed: 0,date,open,high,low,close,volume,adjclose,amount,type,data
0,1715004258,56.860001,56.860001,56.595001,56.680000,166507.0,56.680000,,,
1,1714743000,,,,,,,,,
2,1714656600,55.759998,56.349998,55.509998,55.580002,2781900.0,55.580002,,,
3,1714570200,55.299999,55.740002,54.810001,55.000000,2307600.0,55.000000,,,
4,1714483800,56.230000,56.340000,55.160000,55.160000,2456400.0,55.160000,,,
...,...,...,...,...,...,...,...,...,...,...
248,1683898200,58.130001,58.730000,58.040001,58.310001,2182100.0,55.320564,,,
249,1683811800,57.560001,57.869999,57.119999,57.669998,3606300.0,54.713371,,,
250,1683725400,60.599998,60.660000,58.959999,59.650002,3551400.0,56.591866,,,
251,1683639000,60.130001,60.740002,59.910000,60.480000,2126000.0,57.379307,,,


In [None]:
#Need to clean the date field