In [2]:
pip install ipykernel

Note: you may need to restart the kernel to use updated packages.


In [3]:
import pandas as pd
import yfinance as yf

print("System is ready!")

System is ready!


In [4]:
from bs4 import BeautifulSoup
import pandas as pd
import yfinance as yf
import datetime

# --- PART A: EXTRACT STOCK CODES FROM HTML ---
# We assume 'html_content' is still in your memory from the previous step.
# If not, paste the HTML string into the variable 'html_content' again before running this.

soup = BeautifulSoup(html_content, 'html.parser')
stocks = []

# Find all rows in the table
rows = soup.find_all('tr')

for row in rows:
    # Find the link that contains 'stock_code='
    link = row.find('a', href=True)
    if link and 'stock_code=' in link['href']:
        # Extract 4-digit code (e.g., "0328")
        code = link['href'].split('stock_code=')[1].split('&')[0]
        name = link.text.strip()
        # Format for Yahoo Finance: Code + .KL
        stocks.append({'Name': name, 'Code': f"{code}.KL"})

df_stocks = pd.DataFrame(stocks)
print(f"Found {len(df_stocks)} stock codes. Example: {df_stocks['Code'][0]}")

# --- PART B: DOWNLOAD & FILTER (The "Robo" Logic) ---
print("\nStarting analysis... this may take 2-3 minutes.")
valid_stocks = []

# Define Date Range (6 Years as per mandate)
end_date = datetime.date.today()
start_date = end_date - datetime.timedelta(days=365*6)

for index, row in df_stocks.iterrows():
    ticker = row['Code']
    name = row['Name']
    
    try:
        # Download data (quietly)
        data = yf.download(ticker, start=start_date, end=end_date, progress=False)
        
        # Check 1: Must have data (not empty)
        if len(data) > 0:
            # Check 2: Calculate Average Daily Return
            # Formula: (Today - Yesterday) / Yesterday
            data['Daily_Return'] = data['Adj Close'].pct_change()
            
            # The mandate asks for "Average Daily Return" over 5 years
            # We calculate the mean of the daily % change
            avg_daily_return = data['Daily_Return'].mean() * 100  # Convert to percentage
            
            # Store data if it exists
            valid_stocks.append({
                'Code': ticker,
                'Name': name,
                'Avg_Daily_Return_%': avg_daily_return,
                'Data_Points': len(data)
            })
            
    except Exception as e:
        print(f"Could not process {ticker}")

# --- PART C: SELECT THE TOP 50 ---
df_results = pd.DataFrame(valid_stocks)

# Filter: Apply the 0.25% threshold
# Note: 0.25% daily average is very high. We will sort by return to see the best ones.
qualified_df = df_results[df_results['Avg_Daily_Return_%'] >= 0.25]

# Sort by highest return first
qualified_df = qualified_df.sort_values(by='Avg_Daily_Return_%', ascending=False)

print("\n--- RESULTS ---")
print(f"Total stocks analyzed: {len(df_results)}")
print(f"Stocks meeting 0.25% requirement: {len(qualified_df)}")

# Display top 5 to check
print(qualified_df.head(5))

# Save the best 50 (or all qualified) to CSV for the next step
final_50 = qualified_df.head(50)
final_50.to_csv('selected_50_stocks.csv', index=False)
print("Saved top candidates to 'selected_50_stocks.csv'")

NameError: name 'html_content' is not defined