<a href="https://colab.research.google.com/github/Ishar786/Python/blob/main/VA_trial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ==============================================================================
# 1. SETUP: Install libraries and configure the API Key
# ==============================================================================
!pip install google-generativeai pandas tqdm

import google.generativeai as genai
from google.colab import userdata
import pandas as pd
from tqdm.notebook import tqdm
import json
import time

# Configure the Gemini API with the key from Colab Secrets
try:
    # Using 'GOOGLE_API_KEY' as you specified
    google_api_key = userdata.get('GOOGLE_API_KEY')
    genai.configure(api_key=google_api_key)
    print("Google API Key configured successfully.")
except Exception as e:
    print("Error configuring Google API Key. Please ensure it's set correctly in Colab Secrets.")
    print("Error details:", e)

# ==============================================================================
# 2. INPUTS: Add your company list and review definitions
# ==============================================================================
COMPANIES_TO_PROCESS = [
    "1010data, Inc.",
]

ADM_DEFINITIONS = """
■ Data integration, management, and processing
■ Business intelligence and analytics
■ Data storage infrastructure
■ Data science platforms, tools, and services
■ Data governance, privacy, and security
■ Enterprise information and content management
"""
# ==============================================================================
# 3. CORE LOGIC: Final, high-quality prompt and processing function
# ==============================================================================

def generate_prompt(company_name, definitions):
    """Creates a highly-specific prompt to force live web search and analysis from preferred sources."""
    return f"""
    You are a meticulous financial research analyst. Your task is to gather fresh, verified information for the company: "{company_name}".
    **You must ignore your pre-existing knowledge and perform a new, live web search for every step.**

    **Context: Analytical and Data Management (ADM) Market Definitions**
    {definitions}

    **Instructions for "{company_name}":**

    1.  **Find Live Website:** Perform a live web search to find the active, official corporate website for "{company_name}". If a website is truly inactive or a dead link, state "Inactive".
    2.  **Analyze Current Offerings:** Go to the live website you found. Analyze its "Products," "Solutions," or "Platform" pages to understand what the company *currently* sells.
    3.  **Determine Current Status:** Based on the website and other fresh search results, determine if the company is currently Private, Public (confirm with a stock ticker), or Acquired (but still operating).
    4.  **Find Revenue from Specific Sources:** Perform a targeted search for the most recent annual total revenue (prioritize 2024). **You must prioritize these sources in this order: Growjo, ZoomInfo, PitchBook, Crunchbase, and D&B.** If you find a figure, cite the source URL. If no reliable figure is found on these sites, state "NA".
    5.  **Synthesize and Analyze:** Based ONLY on your fresh research from the live website and the specified sources, provide a summary. Determine the ADM percentage based on the company's *current* products.
    6.  **Format Output:** Return your findings ONLY in the specified JSON format. Do not include any text, markdown, or code formatting before or after the JSON object.

    **JSON Output Format:**
    {{
      "company_name": "{company_name}",
      "website_status": "Active" or "Inactive",
      "company_status": "Private", "Public", or "Acquired",
      "total_revenue_2024": <number> or "NA",
      "revenue_source_url": "URL of the specific source page (e.g., the company's ZoomInfo page)" or "Not Found",
      "adm_percentage_assumption": <number between 0 and 100>,
      "justification_and_notes": "A summary based *only on your live web search*. Describe the company's current products from its website, state its current status, and justify the ADM percentage."
    }}
    """

def process_company(company_name, model, definitions):
    """Processes a single company, calls the API, and handles the response."""
    prompt = generate_prompt(company_name, definitions)
    try:
        response = model.generate_content(prompt)
        cleaned_response = response.text.strip().replace('```json', '').replace('```', '')
        data = json.loads(cleaned_response)
        return data
    except (json.JSONDecodeError, AttributeError, ValueError) as e:
        print(f"Warning: Could not parse JSON for {company_name}. Response: {response.text}")
        return {"company_name": company_name, "error": f"JSON Parse Error: {e}"}
    except Exception as e:
        print(f"Error processing {company_name}: {e}")
        return {"company_name": company_name, "error": f"API or other error: {e}"}

# ==============================================================================
# 4. EXECUTION: Loop through companies with a 1-minute delay
# ==============================================================================
model = genai.GenerativeModel('gemini-1.5-flash')
results_list = []

print(f"Starting analysis for {len(COMPANIES_TO_PROCESS)} companies...")
print("A 61-second delay is applied between each company to respect the 1 RPM rate limit.")

for company in tqdm(COMPANIES_TO_PROCESS, desc="Processing Companies"):
    result = process_company(company, model, ADM_DEFINITIONS)
    results_list.append(result)
    # 61-second delay for 1 Request Per Minute (RPM) limit
    time.sleep(61)

# ==============================================================================
# 5. FINAL REPORT: Process results and save to CSV
# ==============================================================================
print("\nAll companies processed. Creating final report...")

df = pd.DataFrame(results_list)

if not df.empty:
    df['total_revenue_2024'] = df.apply(lambda row: row.get('total_revenue_2024'), axis=1)
    df['adm_percentage_assumption'] = df.apply(lambda row: row.get('adm_percentage_assumption'), axis=1)
    df['Total Revenue (2024)'] = pd.to_numeric(df['total_revenue_2024'], errors='coerce')
    df['ADM Percentage'] = pd.to_numeric(df['adm_percentage_assumption'], errors='coerce')
    df['ADM Revenue (2024)'] = (df['Total Revenue (2024)'] * (df['ADM Percentage'] / 100)).round(4)

    def create_assumptions_text(row):
        assumptions = row.get('justification_and_notes', 'No justification provided.')
        source = row.get('revenue_source_url', 'Not Found')
        if not isinstance(assumptions, str):
            assumptions = str(assumptions)
        if source != "Not Found":
            return f"{assumptions} Source: {source}"
        return assumptions

    df['Final Assumptions Text'] = df.apply(create_assumptions_text, axis=1)

    output_df = pd.DataFrame({
        'Private Vendors': df.get('company_name'),
        'Total Revenue (2024)': df.get('Total Revenue (2024)'),
        'ADM Revenue (2024)': df.get('ADM Revenue (2024)'),
        'Assumptions taken to arrive at ADM Revenues': df.get('Final Assumptions Text'),
        'Company Status': df.get('company_status'),
        'Website Status': df.get('website_status')
    })
    output_df = output_df.fillna('NA')

    output_filename = 'company_revenue_analysis_2024.csv'
    output_df.to_csv(output_filename, index=False)

    print(f"\nSuccess! Report saved to '{output_filename}'.")
    display(output_df)
else:
    print("\nAn error occurred during processing or the results were empty.")
    display(df)