In [1]:
# pip install requests beautifulsoup4


In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import re

def floatme(num):
    # Remove comma separators
    num = num.replace(",", "")
    
    # Check for percentage and remove the % sign
    if "%" in num:
        num = num.strip("%")
        num = float(num) / 100
    
    # Convert to float
    return float(num)

def fetch_stock_analysis_data(ticker):
    url = f'https://finance.yahoo.com/quote/{ticker}/analysis'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}

    response = requests.get(url, headers=headers)
    
    if response.status_code != 200:
        raise Exception(f"Failed to fetch data from Yahoo Finance for {ticker}. Status code: {response.status_code}")
    
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the earnings and revenue data
    data_sections = soup.find_all('section')
    
    earnings_revenue_data = {}
    # print(data_sections)
    for section in data_sections:
        header = section.find('h3')
        if header and ("Earnings Estimate" in header.text or "Earnings History" in header.text or "Growth Estimates" in header.text or 
                       "Revenue Estimate" in header.text or "EPS Trend" in header.text):
            table = section.find('table')
            if table:
                table_data = []
                rows = table.find_all('tr')
                for row in rows:
                    cols = row.find_all('td')
                    cols = [ele.text.strip() for ele in cols]
                    table_data.append([ele for ele in cols if ele])
                earnings_revenue_data[header.text] = table_data
    
    return earnings_revenue_data

def save_to_csv(data, ticker):
    for key, value in data.items():
        df = pd.DataFrame(value)
        print(df)
        filename = f"./dataset/{ticker}_{key.replace(' ', '_').replace('&', 'and')}.csv"
        df.to_csv(filename, index=False, header=False)
        print(f"Saved {key} to {filename}")

if __name__ == "__main__":
    ticker = 'SG'
    data = fetch_stock_analysis_data(ticker)
    print(data)
    save_to_csv(data, ticker)


{'Earnings Estimate': [[], ['No. of Analysts', '5', '5', '5', '5'], ['Avg. Estimate', '-0.17', '-0.14', '-0.51', '-0.36'], ['Low Estimate', '-0.22', '-0.17', '-0.75', '-0.52'], ['High Estimate', '-0.12', '-0.1', '-0.32', '-0.25'], ['Year Ago EPS', '-0.13', '-0.12', '-0.44', '-0.51']], 'Revenue Estimate': [[], ['No. of Analysts', '10', '7', '11', '11'], ['Avg. Estimate', '163.36M', '181.62M', '679.13M', '792.79M'], ['Low Estimate', '161.5M', '177.1M', '677.36M', '772.5M'], ['High Estimate', '169.65M', '188.63M', '685.57M', '811.21M'], ['Year Ago Sales', '153.03M', '157.85M', '584.04M', '679.13M'], ['Sales Growth (year/est)', '6.76%', '15.06%', '16.28%', '16.74%']], 'Earnings History': [[], ['EPS Est.', '-0.15', '-0.14', '-0.02', '-0.11'], ['EPS Actual', '-0.13', '-0.12', '-0.02', '-0.07'], ['Difference', '0.03', '0.02', '0.01', '0.04'], ['Surprise %', '16.72%', '13.24%', '33.99%', '38.04%']], 'EPS Trend': [[], ['Current Estimate', '-0.17', '-0.14', '-0.51', '-0.36'], ['7 Days Ago', '-0.

In [3]:
data

{'Earnings Estimate': [[],
  ['No. of Analysts', '5', '5', '5', '5'],
  ['Avg. Estimate', '-0.17', '-0.14', '-0.51', '-0.36'],
  ['Low Estimate', '-0.22', '-0.17', '-0.75', '-0.52'],
  ['High Estimate', '-0.12', '-0.1', '-0.32', '-0.25'],
  ['Year Ago EPS', '-0.13', '-0.12', '-0.44', '-0.51']],
 'Revenue Estimate': [[],
  ['No. of Analysts', '10', '7', '11', '11'],
  ['Avg. Estimate', '163.36M', '181.62M', '679.13M', '792.79M'],
  ['Low Estimate', '161.5M', '177.1M', '677.36M', '772.5M'],
  ['High Estimate', '169.65M', '188.63M', '685.57M', '811.21M'],
  ['Year Ago Sales', '153.03M', '157.85M', '584.04M', '679.13M'],
  ['Sales Growth (year/est)', '6.76%', '15.06%', '16.28%', '16.74%']],
 'Earnings History': [[],
  ['EPS Est.', '-0.15', '-0.14', '-0.02', '-0.11'],
  ['EPS Actual', '-0.13', '-0.12', '-0.02', '-0.07'],
  ['Difference', '0.03', '0.02', '0.01', '0.04'],
  ['Surprise %', '16.72%', '13.24%', '33.99%', '38.04%']],
 'EPS Trend': [[],
  ['Current Estimate', '-0.17', '-0.14', '-0

In [None]:
stocks=['SG','MGOL', 'DOCS', 'ADMA', 'EVH', 'BLZE', 'IOVA',  'HEAR', 'PBI', 'THRD', 'RICK', 'NKLA', 'SANA', 'CARG', 'RKLB', 'PRTA', 'EMBC', 'FNKO', 'AXL', 'BE', 'TARS', 'RMAX', 'RDW', 'HRTG', 'U', 'AKAM', 'G', 'CLMT', 'SNBR', 'INOD', 'ONTO', 'TASK', 'DXC', 'EXPE', 'HYB', 'DOCN', 'BFH', 'NVRO', 'TTD', 'DIOD', 'AMBC', 'RNA', 'LLY', 'WOW']
# stocks= ["ZAPP", "GTLS", "EYE", "BGS", "SAVA", "UVXY", "PGY", "NOVA", "WOLF", "QRHC", "HIPO", "VSAT", "SRPT", "TTGT", "ENVX", "ACB", "ASTS", "AMN", "UVIX", "QTRX", "SEDG", "SGML", "INO", "ARWR", "MTW", "RUN", "VIAV", "VERV", "AAOI", "ALKT", "IIIV", "PODD", "CURV", "SEE", "RPAY", "CGC", "EBS", "MTUS", "ZVRA", "MNMD", "AQN", "BTMD", "ARLO", "SVV", "SATS", "HCAT", "ELF", "FNA", "VRM", "ARRY", "WBTN", "NFE", "FIVN", "MITK", "PUBM", "AVD"]
stocks=["PUBM", "WBTN", "FIVN", "ELF", "ARLO", "MTUS", "QTRX", "PODD", "IIIV", "AMN", "ARWR", "QNST", "REZI"]
stocks = ["CEPU", "TLN", "OKLO", "CWCO", "RNW", "GLOB", "AMAT", "COHR", "DLO", "CSCO", "ASTS", "RSKD", "LITE", "PYCR", "STNE", "CLBT", "LYTS", "NICE", "OUST", "QUIK", "BKKT", "PSFE", "BEEM", "DMRC", "IBTA", "INTA", "SRAD", "NATL", "MNDY", "DSP", "VTSI", "AILE", "RUM", "BEKE", "LPA", "SDHC", "SUNS", "IPWR", "AIT", "DE", "NPWR", "ARQ", "LOAR", "KE", "CAE", "MRCY", "SERV", "SLRN", "USPH", "ZVRA", "CAH", "MLYS", "GRAL", "URGN", "MNMD", "PRAX", "FENC", "CATX", "PLSE", "YMAB", "FTRE", "LENZ", "MDWD", "OTLK", "ARDT", "ARQT", "CELC", "MRX", "UBS", "IFS", "ABL", "CCAP", "SLF", "KINS", "HUT", "NU", "FLNG", "KGS", "NVGS", "GLNG", "GPRK", "BORR", "PFGC", "SOWG", "SPTN", "WMT", "FLO", "AGRO", "LWAY", "EWCZ", "DOLE", "EAT", "GLBE", "ARCO", "SE", "MLCO", "HD", "CYD", "DESP", "AMCR", "HRB", "TPR", "JD", "GAMB", "BABA", "MSGS", "TME", "ASTL", "FNV", "JHX", "HBM", "GOLD", "SGML"]
for ticker in stocks:
    data = fetch_stock_analysis_data(ticker)
    # print(data)
    if 'Earnings History' not in data:
        continue
    # print(ticker)
    SE1,SE2,SE3,SE4 = data['Earnings History'][4][1:]  #surprise %
    try:
        SE1=floatme(SE1)
        SE2=floatme(SE2)
        SE3=floatme(SE3)
        SE4=floatme(SE4)
    except:
        # print(data)
        continue
    E1,E2,E3,E4 = data['Earnings History'][2][1:]  #Earning actual %
    try:
        AE1=floatme(E1) #earning of Q1.
        AE2=floatme(E2)
        AE3=floatme(E3)
        AE4=floatme(E4)
    except:
        print(data)
        continue
    
    E1,E2,E3,E4 = data['Earnings History'][1][1:]  #Earning estimated %
    try:
        ES1=floatme(E1) #earning of Q1.
        ES2=floatme(E2)
        ES3=floatme(E3)
        ES4=floatme(E4)
    except:
        # print(data)
        continue
    try:
        GrowthCurrentQ= floatme(data['Growth Estimates'][1][1])
        GrowthNextQ= floatme(data['Growth Estimates'][2][1])
        GrowthCurrentY= floatme(data['Growth Estimates'][3][1])
        GrowthNextY= floatme(data['Growth Estimates'][4][1])
    except:
        #print(ticker,"****")
        continue
    
    currentQE = floatme(data['Earnings Estimate'][2][1])
    nextQE = floatme(data['Earnings Estimate'][2][2])
    currentYE=data['Earnings Estimate'][2][3]
    nextYE=data['Earnings Estimate'][2][4]
    # print(f"{ticker}\t{SE1:.2f},{SE2:.2f},{SE3:.2f},{SE4:.2f}")
    # if float(SE1)>0 and float(SE2)> 0 and float(SE3)>0 and currentQE>= E3 and currentQE >= E4:
    # if ticker =="DOCS":
    # print(nextQE, currentQE, ES4,AE4, SE4, SE3)
    if nextQE>=currentQE and currentQE>= ES4 and SE4>0 and GrowthNextQ>0:
        print(f"{ticker}\t{SE1:.2f},{SE2:.2f},{SE3:.2f},{SE4:.2f}||{nextQE}, {currentQE},{AE4:.2f},{AE3:.2f}")
        #print(data)
    time.sleep(1)
    # break