In [8]:
import requests
from requests.adapters import HTTPAdapter, Retry
from bs4 import BeautifulSoup 
import pandas as pd
import os
from dotenv import load_dotenv
from datetime import datetime
from alpha_vantage.timeseries import TimeSeries as TS
import yfinance as yf

In [9]:
def fetch_ngx_list():
    url = "https://www.african-markets.com/en/stock-markets/ngse/listed-companies"

    # Session with retries
    session = requests.Session()
    retries = Retry(total=5, backoff_factor=1, status_forcelist=[500, 502, 503, 504])
    session.mount("http://", HTTPAdapter(max_retries=retries))
    session.mount("https://", HTTPAdapter(max_retries=retries))

    # Add headers to look like a browser
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                      "AppleWebKit/537.36 (KHTML, like Gecko) "
                      "Chrome/120.0.0.0 Safari/537.36"
    }

    # Fetch page
    r = session.get(url, headers=headers, timeout=30)
    r.raise_for_status()

    soup = BeautifulSoup(r.text, "html.parser")

    # Locate the table
    table = soup.find("table")
    rows = table.find_all("tr")

    # Extract headers
    header_cells = rows[0].find_all(["th", "td"])
    headers = [cell.get_text(strip=True) for cell in header_cells]

    # Extract rows
    data = []
    for row in rows[1:]:
        cols = [td.get_text(strip=True) for td in row.find_all("td")]
        if cols:
            data.append(cols)

    # Fallback headers if missing
    if not headers:
        headers = [f"Col_{i+1}" for i in range(len(data[0]))]

    df = pd.DataFrame(data, columns=headers)
    return df

    

In [10]:
df_nigeria = fetch_ngx_list()
df_nigeria

Unnamed: 0,Company,Sector,Price,1D,YTD,M.Cap,Date
0,African Alliance Insurance,Financials,0.20,-,-,4.11,05/12
1,McNichols,Consumer Goods,2.60,-2.26%,+61.49%,2.9,05/12
2,Multi-Trex Integrated Foods,Consumer Goods,0.36,-,-,2.24,05/12
3,Livingtrust Mortgage Bank,Financials,3.38,+4.64%,-22.83%,16.89,05/12
4,Veritas Kapital Assurance,Financials,1.74,+8.07%,+27.94%,24.12,05/12
...,...,...,...,...,...,...,...
151,Transcorp Power,Utilities,307.00,-,-14.70%,2302.5,05/12
152,Aradel Holdings,Oil & Gas,680.00,-,+13.71%,2954.49,05/12
153,UPDC REIT,Financials,6.80,+1.49%,+36.00%,18.14,05/12
154,Legend Internet,Telecom,5.00,-,-,10,05/12


In [11]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_top_movers():
    url = "https://ngxgroup.com/exchange/data/equities/"
    response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
    soup = BeautifulSoup(response.text, "html.parser")

    tables = soup.find_all("table")

    movers = {}
    categories = ["Top Gainers", "Top Losers", "Top Trades"]

    for idx, category in enumerate(categories):
        table = tables[idx]
        rows = table.find("tbody").find_all("tr")
        data = []
        for row in rows:
            cols = [c.get_text(strip=True) for c in row.find_all("td")]
            if cols:
                data.append(cols)
        movers[category] = pd.DataFrame(
            data, columns=["Symbol", "Price", "Change", "Volume"]
        )

    return movers

if __name__ == "__main__":
    trending = scrape_top_movers()
    for category, df in trending.items():
        print(f"\n=== {category} ===")
        print(df.head())


IndexError: list index out of range

In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import pandas as pd
import time

def get_equities_price_list():
    options = Options()
    options.add_argument("--headless")
    options.add_argument("--disable-gpu")
    # set path to your driver if needed
    driver = webdriver.Chrome(options=options)

    url = "https://ngxgroup.com/exchange/data/equities-price-list/"
    driver.get(url)
    time.sleep(5)  # wait for JS to load the page; adjust if slower

    html = driver.page_source
    soup = BeautifulSoup(html, "html.parser")

    # Now find the table(s) you want
    table = soup.find("table")  # you may need a more specific selector
    rows = table.find("tbody").find_all("tr")

    data = []
    headers = [th.get_text(strip=True) for th in table.find("thead").find_all("th")]
    for row in rows:
        cols = [td.get_text(strip=True) for td in row.find_all("td")]
        if len(cols) == len(headers):
            data.append(cols)

    df = pd.DataFrame(data, columns=headers)
    driver.quit()
    return df

if __name__ == "__main__":
    df = get_equities_price_list()
    print(df.head())


            Company PreviousClosingPrice Opening Price   High    Low  Close  \
0          ABBEYBDS                  6.8           6.8     --     --   6.80   
1          ABCTRANS                  4.3           4.3     --     --   4.30   
2           ACADEMY                 9.54          9.54   9.60   9.60   9.60   
3  ACCESSCORP [AWR]                   27         26.95  26.95  25.25  25.90   
4   AFRINSURE [MRF]                  0.2           0.2     --     --   0.20   

  Change Trades      Volume           Value Trade Date  
0            29      95,266      671,152.50  19 Sep 25  
1            47     151,140      654,553.06  19 Sep 25  
2   0.06     46     489,373    4,720,865.92  19 Sep 25  
3   -1.1   1404  29,413,174  765,239,030.25  19 Sep 25  
4             0          --              --  19 Sep 25  


In [12]:
def fetch_ngx_historical(symbol, years=5):
    """
    Fetch historical data for a specific NGX stock
    Example URL pattern: https://www.african-markets.com/en/stock-markets/ngse/[symbol]/historical-data
    """
    url = f"https://www.african-markets.com/en/stock-markets/ngse/{symbol.lower()}/historical-data"
    
    session = requests.Session()
    retries = Retry(total=5, backoff_factor=1, status_forcelist=[500, 502, 503, 504])
    session.mount("http://", HTTPAdapter(max_retries=retries))
    session.mount("https://", HTTPAdapter(max_retries=retries))
    
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                      "AppleWebKit/537.36 (KHTML, like Gecko) "
                      "Chrome/120.0.0.0 Safari/537.36"
    }
    
    r = session.get(url, headers=headers, timeout=30)
    r.raise_for_status()
    
    soup = BeautifulSoup(r.text, "html.parser")
    table = soup.find("table")  # Find historical price table
    
    # Parse table similar to your fetch_ngx_list function
    rows = table.find_all("tr")
    data = []
    for row in rows[1:]:
        cols = [td.get_text(strip=True) for td in row.find_all("td")]
        if cols:
            data.append(cols)
    
    headers = [th.get_text(strip=True) for th in rows[0].find_all(["th", "td"])]
    df = pd.DataFrame(data, columns=headers)
    
    return df


In [None]:
# Test the historical data function with a major NGX stock
# Let's try Dangote Cement (DANGCEM) - one of the largest companies

try:
    df_historical = fetch_ngx_historical("DANGCEM")
    print(f"‚úÖ Successfully fetched historical data for DANGCEM")
    print(f"Shape: {df_historical.shape}")
    print("\nFirst few rows:")
    print(df_historical.head())
    print("\nColumns:", df_historical.columns.tolist())
except Exception as e:
    print(f"‚ùå Error: {e}")
    print("\nTrying alternative stocks...")
    
    # Try other major stocks
    test_symbols = ["MTNN", "ZENITHBANK", "GUARANTY", "NESTLE"]
    for symbol in test_symbols:
        try:
            df_test = fetch_ngx_historical(symbol)
            print(f"‚úÖ {symbol} worked! Shape: {df_test.shape}")
            print(df_test.head())
            break
        except Exception as e2:
            print(f"‚ùå {symbol} failed: {e2}")


‚ùå Error: 404 Client Error: Not Found for url: https://www.african-markets.com/en/stock-markets/ngse/dangcem/historical-data

Trying alternative stocks...
‚ùå MTNN failed: 404 Client Error: Not Found for url: https://www.african-markets.com/en/stock-markets/ngse/mtnn/historical-data
‚ùå MTNN failed: 404 Client Error: Not Found for url: https://www.african-markets.com/en/stock-markets/ngse/mtnn/historical-data
‚ùå ZENITHBANK failed: 404 Client Error: Not Found for url: https://www.african-markets.com/en/stock-markets/ngse/zenithbank/historical-data
‚ùå ZENITHBANK failed: 404 Client Error: Not Found for url: https://www.african-markets.com/en/stock-markets/ngse/zenithbank/historical-data
‚ùå GUARANTY failed: 404 Client Error: Not Found for url: https://www.african-markets.com/en/stock-markets/ngse/guaranty/historical-data
‚ùå GUARANTY failed: 404 Client Error: Not Found for url: https://www.african-markets.com/en/stock-markets/ngse/guaranty/historical-data
‚ùå NESTLE failed: 404 Client 

In [None]:
# Let's inspect the company names from the list to find the correct URL slug
# Check the first few rows to see the actual company name format

print("Top 10 NGX companies from african-markets.com:")
print(df_nigeria[['Company', 'Price']].head(10))
print("\n" + "="*50 + "\n")

# Try to construct URL from actual company name
# African-markets likely uses slugified company names, not ticker symbols
test_company = df_nigeria.iloc[0]['Company']
test_slug = test_company.lower().replace(' ', '-').replace('.', '')

print(f"Testing with: {test_company}")
print(f"URL slug: {test_slug}")
print(f"Full URL: https://www.african-markets.com/en/stock-markets/ngse/{test_slug}/historical-data")


Top 10 NGX companies from african-markets.com:
                       Company  Price
0   African Alliance Insurance   0.20
1                    McNichols   2.60
2  Multi-Trex Integrated Foods   0.36
3    Livingtrust Mortgage Bank   3.38
4    Veritas Kapital Assurance   1.74
5          Abbey Mortgage Bank   5.85
6                ABC Transport   3.10
7                Academy Press   7.35
8            Africa Prudential  13.00
9                    Afromedia   0.24


Testing with: African Alliance Insurance
URL slug: african-alliance-insurance
Full URL: https://www.african-markets.com/en/stock-markets/ngse/african-alliance-insurance/historical-data


In [13]:
def fetch_ngx_historical_by_name(company_name, years=5):
    """
    Fetch historical data for a specific NGX stock using company name
    """
    # Create URL slug from company name
    slug = company_name.lower().replace(' ', '-').replace('.', '').replace(',', '')
    url = f"https://www.african-markets.com/en/stock-markets/ngse/{slug}/historical-data"
    
    print(f"Fetching: {url}")
    
    session = requests.Session()
    retries = Retry(total=3, backoff_factor=1, status_forcelist=[500, 502, 503, 504])
    session.mount("http://", HTTPAdapter(max_retries=retries))
    session.mount("https://", HTTPAdapter(max_retries=retries))
    
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                      "AppleWebKit/537.36 (KHTML, like Gecko) "
                      "Chrome/120.0.0.0 Safari/537.36"
    }
    
    r = session.get(url, headers=headers, timeout=30)
    r.raise_for_status()
    
    soup = BeautifulSoup(r.text, "html.parser")
    table = soup.find("table")
    
    if not table:
        raise ValueError(f"No table found on page for {company_name}")
    
    rows = table.find_all("tr")
    
    # Extract headers
    header_cells = rows[0].find_all(["th", "td"])
    headers = [cell.get_text(strip=True) for cell in header_cells]
    
    # Extract data rows
    data = []
    for row in rows[1:]:
        cols = [td.get_text(strip=True) for td in row.find_all("td")]
        if cols:
            data.append(cols)
    
    df = pd.DataFrame(data, columns=headers)
    df['Company'] = company_name
    
    return df

# Test with a few companies from the list
test_companies = [
    "African Alliance Insurance",
    "Abbey Mortgage Bank", 
    "Africa Prudential"
]

for company in test_companies:
    try:
        df_hist = fetch_ngx_historical_by_name(company)
        print(f"‚úÖ {company}: {df_hist.shape[0]} rows")
        print(df_hist.head(3))
        print()
        break  # If one works, stop testing
    except Exception as e:
        print(f"‚ùå {company}: {e}\n")


Fetching: https://www.african-markets.com/en/stock-markets/ngse/african-alliance-insurance/historical-data
‚ùå African Alliance Insurance: 404 Client Error: Not Found for url: https://www.african-markets.com/en/stock-markets/ngse/african-alliance-insurance/historical-data

Fetching: https://www.african-markets.com/en/stock-markets/ngse/abbey-mortgage-bank/historical-data
‚ùå African Alliance Insurance: 404 Client Error: Not Found for url: https://www.african-markets.com/en/stock-markets/ngse/african-alliance-insurance/historical-data

Fetching: https://www.african-markets.com/en/stock-markets/ngse/abbey-mortgage-bank/historical-data
‚ùå Abbey Mortgage Bank: 404 Client Error: Not Found for url: https://www.african-markets.com/en/stock-markets/ngse/abbey-mortgage-bank/historical-data

Fetching: https://www.african-markets.com/en/stock-markets/ngse/africa-prudential/historical-data
‚ùå Abbey Mortgage Bank: 404 Client Error: Not Found for url: https://www.african-markets.com/en/stock-marke

In [14]:
# Let's inspect the actual links on the NGX company listing page
# to see what the real URL structure is

url = "https://www.african-markets.com/en/stock-markets/ngse/listed-companies"
session = requests.Session()
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                  "AppleWebKit/537.36 (KHTML, like Gecko) "
                  "Chrome/120.0.0.0 Safari/537.36"
}

r = session.get(url, headers=headers, timeout=30)
soup = BeautifulSoup(r.text, "html.parser")

# Find the first company link in the table
table = soup.find("table")
first_row = table.find("tbody").find_all("tr")[0]
company_cell = first_row.find("td")
link = company_cell.find("a")

if link:
    href = link.get('href')
    company_name = link.get_text(strip=True)
    print(f"First company: {company_name}")
    print(f"Link structure: {href}")
    print(f"Full URL: https://www.african-markets.com{href}")
    
    # Try to access this actual page and see if historical data link exists
    full_url = f"https://www.african-markets.com{href}"
    r2 = session.get(full_url, headers=headers, timeout=30)
    soup2 = BeautifulSoup(r2.text, "html.parser")
    
    # Look for historical data link on the company page
    hist_link = soup2.find("a", string=lambda text: text and "historical" in text.lower())
    if hist_link:
        print(f"\n‚úÖ Historical data link found: {hist_link.get('href')}")
    else:
        print("\n‚ùå No historical data link found on company page")
        print("Available links:")
        for a in soup2.find_all("a", href=True)[:10]:
            print(f"  - {a.get_text(strip=True)}: {a.get('href')}")
else:
    print("No link found in company cell")


First company: African Alliance Insurance
Link structure: listed-companies/company?code=AFRINSURE
Full URL: https://www.african-markets.comlisted-companies/company?code=AFRINSURE


ConnectionError: HTTPSConnectionPool(host='www.african-markets.comlisted-companies', port=443): Max retries exceeded with url: /company?code=AFRINSURE (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x7f1b71e64a70>: Failed to resolve 'www.african-markets.comlisted-companies' ([Errno -2] Name or service not known)"))

## üéØ Key Finding: African Markets Uses Stock CODES

The URL structure is: `https://www.african-markets.com/en/stock-markets/ngse/listed-companies/company?code=AFRINSURE`

**Problem:** We need the stock CODE (e.g., `AFRINSURE`), not the company name.  
**Solution:** Extract codes from the company list table or scrape them from the links.


In [15]:
# Extract stock codes from the company list table
def fetch_ngx_with_codes():
    """Fetch NGX company list including stock codes from href links"""
    url = "https://www.african-markets.com/en/stock-markets/ngse/listed-companies"
    
    session = requests.Session()
    retries = Retry(total=5, backoff_factor=1, status_forcelist=[500, 502, 503, 504])
    session.mount("http://", HTTPAdapter(max_retries=retries))
    session.mount("https://", HTTPAdapter(max_retries=retries))
    
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                      "AppleWebKit/537.36 (KHTML, like Gecko) "
                      "Chrome/120.0.0.0 Safari/537.36"
    }
    
    r = session.get(url, headers=headers, timeout=30)
    r.raise_for_status()
    
    soup = BeautifulSoup(r.text, "html.parser")
    table = soup.find("table")
    rows = table.find_all("tr")
    
    # Extract headers
    header_cells = rows[0].find_all(["th", "td"])
    headers = [cell.get_text(strip=True) for cell in header_cells]
    headers.append("Stock_Code")  # Add new column for stock code
    
    # Extract rows and stock codes
    data = []
    for row in rows[1:]:
        cols = [td.get_text(strip=True) for td in row.find_all("td")]
        
        # Extract stock code from link
        link = row.find("a")
        stock_code = None
        if link and 'href' in link.attrs:
            href = link['href']
            # Extract code from URL like "company?code=AFRINSURE"
            if '?code=' in href:
                stock_code = href.split('?code=')[1]
        
        if cols:
            cols.append(stock_code)
            data.append(cols)
    
    df = pd.DataFrame(data, columns=headers)
    return df

# Fetch updated data with stock codes
df_ngx_with_codes = fetch_ngx_with_codes()
print(f"‚úÖ Fetched {len(df_ngx_with_codes)} companies with stock codes")
print("\nFirst 10 companies:")
print(df_ngx_with_codes[['Company', 'Stock_Code', 'Price', 'Sector']].head(10))


‚úÖ Fetched 156 companies with stock codes

First 10 companies:
                       Company   Stock_Code  Price             Sector
0   African Alliance Insurance    AFRINSURE   0.20         Financials
1                    McNichols    MCNICHOLS   2.60     Consumer Goods
2  Multi-Trex Integrated Foods    MULTITREX   0.36     Consumer Goods
3    Livingtrust Mortgage Bank  LIVINGTRUST   3.38         Financials
4    Veritas Kapital Assurance   VERITASKAP   1.74         Financials
5          Abbey Mortgage Bank     ABBEYBDS   5.85         Financials
6                ABC Transport     ABCTRANS   3.10  Consumer Services
7                Academy Press      ACADEMY   7.35        Industrials
8            Africa Prudential     AFRIPRUD  13.00         Technology
9                    Afromedia    AFROMEDIA   0.24  Consumer Services


In [16]:
# Now try fetching historical data with the correct stock code
def fetch_ngx_historical_by_code(stock_code):
    """Fetch historical data using stock code"""
    # Try different URL patterns
    possible_urls = [
        f"https://www.african-markets.com/en/stock-markets/ngse/listed-companies/company/historical-data?code={stock_code}",
        f"https://www.african-markets.com/en/stock-markets/ngse/listed-companies/company?code={stock_code}&view=historical",
        f"https://www.african-markets.com/en/stock-markets/ngse/{stock_code.lower()}/historical-data",
    ]
    
    session = requests.Session()
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                      "AppleWebKit/537.36 (KHTML, like Gecko) "
                      "Chrome/120.0.0.0 Safari/537.36"
    }
    
    for url in possible_urls:
        try:
            print(f"Trying: {url}")
            r = session.get(url, headers=headers, timeout=30)
            
            if r.status_code == 200:
                soup = BeautifulSoup(r.text, "html.parser")
                table = soup.find("table")
                
                if table:
                    print(f"‚úÖ Found table at: {url}")
                    rows = table.find_all("tr")
                    
                    # Extract headers
                    header_cells = rows[0].find_all(["th", "td"])
                    headers = [cell.get_text(strip=True) for cell in header_cells]
                    
                    # Extract data
                    data = []
                    for row in rows[1:]:
                        cols = [td.get_text(strip=True) for td in row.find_all("td")]
                        if cols:
                            data.append(cols)
                    
                    df = pd.DataFrame(data, columns=headers)
                    df['Stock_Code'] = stock_code
                    return df
                else:
                    print(f"  No table found")
            else:
                print(f"  Status: {r.status_code}")
        except Exception as e:
            print(f"  Error: {e}")
    
    raise ValueError(f"Could not fetch historical data for {stock_code} from any URL pattern")

# Test with first few stock codes
test_codes = df_ngx_with_codes['Stock_Code'].head(5).tolist()
print(f"\nTesting with: {test_codes}\n")

for code in test_codes:
    if code:  # Skip None values
        try:
            df_hist = fetch_ngx_historical_by_code(code)
            print(f"‚úÖ SUCCESS for {code}!")
            print(df_hist.head(3))
            print()
            break
        except Exception as e:
            print(f"‚ùå {code} failed\n")



Testing with: ['AFRINSURE', 'MCNICHOLS', 'MULTITREX', 'LIVINGTRUST', 'VERITASKAP']

Trying: https://www.african-markets.com/en/stock-markets/ngse/listed-companies/company/historical-data?code=AFRINSURE
  Status: 404
Trying: https://www.african-markets.com/en/stock-markets/ngse/listed-companies/company?code=AFRINSURE&view=historical
  Status: 404
Trying: https://www.african-markets.com/en/stock-markets/ngse/listed-companies/company?code=AFRINSURE&view=historical
  Status: 404
Trying: https://www.african-markets.com/en/stock-markets/ngse/afrinsure/historical-data
  Status: 404
Trying: https://www.african-markets.com/en/stock-markets/ngse/afrinsure/historical-data
  Status: 404
‚ùå AFRINSURE failed

Trying: https://www.african-markets.com/en/stock-markets/ngse/listed-companies/company/historical-data?code=MCNICHOLS
  Status: 404
‚ùå AFRINSURE failed

Trying: https://www.african-markets.com/en/stock-markets/ngse/listed-companies/company/historical-data?code=MCNICHOLS
  Status: 404
Trying:

## üìä **Verdict: Historical Data Not Available on African-Markets.com**

After testing multiple URL patterns, **african-markets.com does NOT provide free historical data**.

---

## ‚úÖ **Your Options for NGX Historical Data:**

### **Option 1: Daily Collection (FREE & RECOMMENDED)** 
Start scraping daily and build your own history:
```python
# Save daily snapshot
timestamp = datetime.now().strftime("%Y-%m-%d")
df_ngx.to_csv(f"data/raw/ngx/{timestamp}_snapshot.csv")
```
- ‚úÖ After 30 days ‚Üí calculate moving averages
- ‚úÖ After 90 days ‚Üí volatility analysis
- ‚úÖ Full control over data quality

### **Option 2: Yahoo Finance (PARTIAL)**
Try major NGX stocks with `.LG` suffix:
```python
import yfinance as yf
yf.Ticker("DANGCEM.LG").history(period="1y")
```
- ‚ö†Ô∏è Only works for ~10-20 major stocks
- ‚úÖ Free and reliable

### **Option 3: Investing.com (COMPLEX)**
Web scrape with POST requests to their AJAX endpoint
- ‚ö†Ô∏è Requires reverse-engineering their API
- ‚ö†Ô∏è May get blocked/rate-limited

### **Option 4: Paid Data Providers**
- **NGX Data Portal** (official, expensive)
- **Bloomberg Terminal** 
- **Refinitiv/Eikon**

---

## üéØ **My Recommendation:**

**Combine Option 1 + Option 2:**
1. Use Yahoo Finance for major stocks (Dangote, MTN, Zenith, GTB)
2. For all other stocks, start daily collection TODAY
3. After 30 days, you'll have enough historical data for analysis
