In [10]:
import pandas as pd

df = pd.read_csv(r'./StockData.csv')

ndf = df["TckrSymb"].dropna()  # Drop NaN values if any

# Convert to a Python list
companies = ndf.tolist()


In [None]:
import time
import requests
import pandas as pd
import os
from datetime import datetime

class NSEAnnualReportDownloader:
    def __init__(self, output_dir="Data/annual_reports"):
        self.session = requests.Session()
        self.headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
            "Accept": "application/json, text/plain, */*",
            "Referer": "https://www.nseindia.com/",
        }
        self.output_dir = output_dir
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        
        # Initialize cookies by visiting NSE homepage
        self._initialize_session()
    
    def _initialize_session(self):
        """Initialize session with cookies from NSE homepage"""
        try:
            self.session.get("https://www.nseindia.com", headers=self.headers, timeout=10)
            print("Session initialized successfully")
        except Exception as e:
            print(f"Error initializing session: {e}")
    
    def download_latest_annual_report(self, symbol):
        """Download the latest annual report for a given company symbol"""
        try:
            # Visit company page to get cookies
            self.session.get(f"https://www.nseindia.com/get-quotes/equity?symbol={symbol}", 
                             headers=self.headers, 
                             timeout=10)
            
            # Get annual reports data
            url = f"https://www.nseindia.com/api/annual-reports?index=equities&symbol={symbol}"
            response = self.session.get(url, headers=self.headers, timeout=10)
            
            if response.status_code != 200:
                print(f"Failed to fetch annual reports for {symbol}. Status Code: {response.status_code}")
                return None
            
            data = response.json()
            
            if not data or 'data' not in data or len(data['data']) == 0:
                print(f"No annual reports found for {symbol}")
                return None
            
            # Get the latest report (first in the list)
            latest_report = data['data'][0]
            report_url = latest_report['fileName']
            report_year = f"{latest_report['fromYr']}-{latest_report['toYr']}"
            
            # Download the report
            file_extension = report_url.split('.')[-1]
            output_filename = f"{symbol}_annual_report_{report_year}.{file_extension}"
            output_path = os.path.join(self.output_dir, output_filename)
            
            report_response = self.session.get(report_url, headers=self.headers, timeout=30)
            if report_response.status_code == 200:
                with open(output_path, 'wb') as f:
                    f.write(report_response.content)
                print(f"✅ Successfully downloaded annual report for {symbol} ({report_year})")
                return {
                    "symbol": symbol,
                    "year": report_year,
                    "file_path": output_path,
                    "report_data": latest_report
                }
            else:
                print(f"❌ Failed to download annual report for {symbol}. Status Code: {report_response.status_code}")
                return None
        
        except Exception as e:
            print(f"❌ Error downloading annual report for {symbol}: {e}")
            return None
    
    def download_reports_for_companies(self, companies_list, delay=1):
        """Download the latest annual report for a list of companies"""
        results = []
        
        for i, symbol in enumerate(companies_list):
            print(f"[{i+1}/{len(companies_list)}] Processing {symbol}...")
            result = self.download_latest_annual_report(symbol)
            if result:
                results.append(result)
            
            # Add delay to avoid rate limiting
            if i < len(companies_list) - 1:
                time.sleep(delay)
        
        # Save results to CSV
        if results:
            report_df = pd.DataFrame(results)
            report_df.to_csv(os.path.join(self.output_dir, 'annual_reports_summary.csv'), index=False)
            print(f"✅ Summary saved to {os.path.join(self.output_dir, 'annual_reports_summary.csv')}")
        
        return results

# Example usage
if __name__ == "__main__":
    # Import list of companies
    df = pd.read_csv('./StockData.csv')
    companies = df["TckrSymb"].dropna().tolist()
        
    downloader = NSEAnnualReportDownloader()
    results = downloader.download_reports_for_companies(companies, delay=1)