Lets load the dataset and view it

In [None]:
import os
os.listdir()

In [None]:
import pandas as pd
df = pd.read_excel("Company History.xlsx")
df.head()

Lets check how many attributes are there and what are they.

In [None]:
df.info()


Lets see how many win or loss are there.

In [None]:
df["Result"].value_counts()


Lets see what services have the highest win count

In [None]:
df[df["Result"] == "Win"]["Service"].value_counts()


Lets see what domain have the highest win count

In [None]:
df[df["Result"] == "Win"]["Domain"].value_counts()


In [None]:
import os
import openai

# Replace with your actual API key
os.environ["OPENAI_API_KEY"] = "API key"
openai.api_key = os.getenv("OPENAI_API_KEY")


In [None]:
#Code to call API

In [None]:
import pandas as pd
import openai

# Step 1: Define X company's strong domains and services
domain_service_map = {
    "Finance": ["Fraud Detection", "Risk Modeling", "Compliance Automation"],
    "Healthcare": ["Patient Data Analytics", "Risk Modeling"],
    "Retail": ["Customer Analytics", "Demand Forecasting"]
}

# Step 2: Function to query OpenAI for companies and recommended services
def get_companies_for_domain(domain, services, n_companies=10):
    prompt = f"""
    List {n_companies} real US companies in the {domain} industry that could need services like {', '.join(services)}.
    Return in CSV format with columns: Company, Industry, Recommended Service.
    Include the most suitable service for each company based on industry needs.
    """

    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a business analyst helping identify potential clients."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=500
    )

    result_text = response.choices[0].message.content
    return result_text

# Step 3: Get companies for all domains
all_results = []

for domain, services in domain_service_map.items():
    text_output = get_companies_for_domain(domain, services)
    print(f"\n--- {domain} Companies ---\n{text_output}\n")

    # Convert CSV text to DataFrame
    try:
        df = pd.read_csv(pd.compat.StringIO(text_output))
        all_results.append(df)
    except:
        print(f"Could not parse OpenAI output for {domain} automatically. You can parse manually if needed.")

# Step 4: Combine all results into one DataFrame (if parsing succeeded)
if all_results:
    final_df = pd.concat(all_results, ignore_index=True)
    print("Combined Result Table:")
    print(final_df)
else:
    print("No DataFrames to combine. You may need to parse OpenAI output manually.")


Web scrapping using wikepedia

In [None]:
import requests
import pandas as pd

url = "https://en.wikipedia.org/wiki/List_of_largest_banks_in_the_United_States"
headers = {"User-Agent": "Mozilla/5.0"}
response = requests.get(url, headers=headers)
tables = pd.read_html(response.text)
finance_df = tables[0]

In [None]:
print(finance_df.columns)


In [None]:
finance_df = finance_df[["Bank", "Total assets (billions of US$)[3]"]]
finance_df.columns = ["Company", "Assets"]

finance_df.head(10)


Hugging Face


In [None]:
from transformers import pipeline


generator = pipeline(
    "text-generation",
    model="tiiuae/falcon-7b-instruct",
    #model="tiiuae/falcon-1b-instruct", # Example instruction-tuned model on Hugging Face
    #model="distilgpt2",
    #model="OpenAssistant/oasst-sft-1-pythia-1.3B",
    device=-1  # use CPU, or 0 if you have GPU
)


In [None]:
prompt = """
List 5 US finance companies and which service they most likely need from:
Fraud Detection, Risk Modeling, Compliance Automation.
Give the output in plain text only, one company per line.
"""

# Step 2: Generate output using your already loaded generator
result = generator(
    prompt,
    max_new_tokens=40,  # smaller output
    do_sample=True,
    temperature=0.7
)

# Step 3: Print the generated text
print(result[0]['generated_text'])

This is using Hugging face. runs but takes very longer time in CPU.

In [None]:
import requests

# Step 1: Your Hugging Face API key
HF_API_KEY = "API key"  # replace with your key

# Step 2: Public, small model
MODEL = "distilgpt2"

# Step 3: Router API URL (new endpoint)
API_URL = f"https://router.huggingface.co/api-inference/{MODEL}"
headers = {"Authorization": f"Bearer {HF_API_KEY}"}

# Step 4: Prompt
prompt = """
List 5 US finance companies and which service they most likely need from:
Fraud Detection, Risk Modeling, Compliance Automation.
Give the output in plain text only, one company per line.
"""

# Step 5: Query function
def query_hf_router(prompt):
    payload = {"inputs": prompt, "parameters": {"max_new_tokens": 150}}
    response = requests.post(API_URL, headers=headers, json=payload)
    if response.status_code == 200:
        output = response.json()
        if isinstance(output, list) and 'generated_text' in output[0]:
            return output[0]['generated_text']
        elif isinstance(output, list):
            # Sometimes the output is under 'text'
            return output[0].get('text', str(output))
        else:
            return str(output)
    else:
        return f"Error {response.status_code}: {response.text}"

# Step 6: Get the output
result_text = query_hf_router(prompt)
print("Generated Output:\n")
print(result_text)


In [None]:
import requests

API_KEY = "API Key"
URL = "https://api.groq.com/openai/v1/chat/completions"

headers = {
    "Authorization": f"Bearer {API_KEY}",
    "Content-Type": "application/json"
}

prompt = """
List 5 US finance companies and which service they most likely need from:
Fraud Detection, Risk Modeling, Compliance Automation.
Give the output in plain text only, one company per line.
"""

data = {
    "model": "llama-3.1-8b-instant",
    "messages": [
        {"role": "user", "content": prompt}
    ],
    "temperature": 0.7,
    "max_tokens": 150
}

response = requests.post(URL, headers=headers, json=data)

if response.status_code == 200:
    result = response.json()
    print(result["choices"][0]["message"]["content"])
else:
    print("Error:", response.text)


In [None]:
import requests

API_KEY = "API Key"
URL = "https://api.groq.com/openai/v1/chat/completions"

headers = {
    "Authorization": f"Bearer {API_KEY}",
    "Content-Type": "application/json"
}

prompt = """
List 10 healthcare, hospital network, health insurance, or health technology companies in the USA that would strongly benefit from advanced data analytics services.

For each company:
1. Mention the company name
2. Briefly describe the business area (hospital, insurance, health tech, etc.)
3. Specify one high-value analytics service they likely need, such as:
   - Patient risk prediction
   - Population health analytics
   - Clinical outcomes dashboards
   - Healthcare cost optimization
   - Readmission prediction
   - Resource utilization analysis
   - Claims data analysis
   - Fraud detection in healthcare billing
   - Predictive modeling for chronic diseases

Format:
Company Name – Business Type – Recommended Analytics Service

Output in plain text only, one company per line.

"""

data = {
    "model": "llama-3.1-8b-instant",
    "messages": [
        {"role": "user", "content": prompt}
    ],
    "temperature": 0.7,
    "max_tokens": 150
}

response = requests.post(URL, headers=headers, json=data)

if response.status_code == 200:
    result = response.json()
    print(result["choices"][0]["message"]["content"])
else:
    print("Error:", response.text)


In [None]:
import requests

# Step 1: Extract the services your company is strong in
# Using 'Account' and 'Result' columns from your dataset
services_strength = df[(df['Account'] == 'OurCompanyX') & (df['Result'] == 'Win')]['Service'].unique().tolist()
services_str = ", ".join(services_strength)

# Step 2: Define the AI prompt
prompt = f"""
You are a business analyst.

First, list top US finance companies that are most likely to need the following services: Fraud Detection, Risk Modeling, Compliance Automation.

Then, from that list, highlight which of these services OurCompanyX can provide based on our previous successes.
OurCompanyX is strong in: {services_str}.

Format the output as:

1. Company Name – Needed Service(s) – Can OurCompanyX Provide? (Yes/No) – Reason
"""

# Step 3: Groq API setup
API_KEY = "API Key"
URL = "https://api.groq.com/openai/v1/chat/completions"

headers = {
    "Authorization": f"Bearer {API_KEY}",
    "Content-Type": "application/json"
}

data = {
    "model": "llama-3.1-8b-instant",
    "messages": [{"role": "user", "content": prompt}],
    "temperature": 0.7,
    "max_tokens": 1000
}

# Step 4: Make the request
response = requests.post(URL, headers=headers, json=data)

if response.status_code == 200:
    result = response.json()
    print(result["choices"][0]["message"]["content"])
else:
    print("Error:", response.text)
