<a href="https://colab.research.google.com/github/KavyaM22/Stock-Logo-Scraper/blob/main/project_intern_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import requests
import pandas as pd
from PIL import Image
from io import BytesIO
from bs4 import BeautifulSoup
import logging
import traceback
import csv  # Import the csv module

# Initialize logging
logging.basicConfig(filename="logo_scraping.log", level=logging.INFO,
                    format="%(asctime)s:%(levelname)s:%(message)s")

# Create directories for logos
logo_dir = "logo"
os.makedirs(logo_dir, exist_ok=True)

# Path for metadata CSV
metadata_file = "logo_metadata.csv"

# Function to validate company names
def validate_data(symbol):
    if isinstance(symbol, str) and len(symbol.strip()) > 0:
        return symbol.strip().replace(" ", "+").lower()  # Replace spaces with "+" for search queries
    else:
        logging.warning(f"Invalid symbol or company name: {symbol}")
        return None

# Function to fetch logo using Google Images search
def fetch_logo(symbol):
    search_url = f"https://www.google.com/search?hl=en&tbm=isch&q={symbol}+logo"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    try:
        response = requests.get(search_url, headers=headers, timeout=10)
        soup = BeautifulSoup(response.text, 'html.parser')
        img_tags = soup.find_all('img')

        if img_tags:
            img_url = img_tags[1]['src']  # Get the URL of the second image (usually the first is a placeholder)
            img_response = requests.get(img_url)
            image = Image.open(BytesIO(img_response.content))
            return image
        else:
            logging.warning(f"No images found for {symbol}")
            return None
    except requests.RequestException as e:
        logging.error(f"Error fetching logo for {symbol}: {e}")
        logging.error(traceback.format_exc())
        return None

# Function to save the logo and metadata
def save_logo(symbol, image):
    ext = 'png'
    file_name = f"{symbol}.{ext}"
    file_path = os.path.join(logo_dir, file_name)

    if not os.path.exists(file_path):
        try:
            image.save(file_path)
            with open(metadata_file, 'a', newline='') as csvfile:
                csvwriter = csv.writer(csvfile)
                csvwriter.writerow([symbol, file_name])
            logging.info(f"Saved logo for {symbol} as {file_name}")
        except Exception as e:
            logging.error(f"Error saving logo for {symbol}: {e}")
            logging.error(traceback.format_exc())
    else:
        logging.info(f"Logo for {symbol} already exists: {file_name}")

# Function to process the company list and fetch logos
def process_companies(company_list):
    # Create metadata CSV if not exists
    if not os.path.exists(metadata_file):
        with open(metadata_file, 'w', newline='') as csvfile:
            csvwriter = csv.writer(csvfile)
            csvwriter.writerow(["Company/Symbol", "Logo File"])

    for symbol in company_list:
        validated_symbol = validate_data(symbol)
        if validated_symbol:
            image = fetch_logo(validated_symbol)
            if image:
                save_logo(validated_symbol, image)
            else:
                logging.warning(f"No logo found for {validated_symbol}")

# Load company/symbol data from CSV
def load_company_data(file_path):
    try:
        df = pd.read_csv(file_path)
        if 'SYMBOL' in df.columns:
            return df['SYMBOL'].tolist()
        elif 'NAME OF COMPANY' in df.columns:
            return df['NAME OF COMPANY'].tolist()
        else:
            logging.error("No valid 'SYMBOL' or 'NAME OF COMPANY' column found in the CSV file.")
            return []
    except Exception as e:
        logging.error(f"Error loading company data: {e}")
        logging.error(traceback.format_exc())
        return []

# Main execution
if __name__ == "__main__":
    # Load company list from CSV
    company_list = load_company_data('/content/sample_data/nse_listed.csv')

    # Fetch logos for the companies in the order they appear in the CSV
    process_companies(company_list)


In [None]:
import shutil
from google.colab import files

# Path to the folder you want to zip and download
folder_to_zip = "logo"
output_zip = "logo.zip"

# Zip the folder
shutil.make_archive(output_zip.replace('.zip', ''), 'zip', folder_to_zip)

# Download the zipped file
files.download(output_zip)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>