In [None]:
# Synchronous Example
from atoma_sdk import AtomaSDK
import os

from dotenv import load_dotenv
load_dotenv()

assert os.environ.get('ATOMA_BEARER') != None


def query_llm(query, include_thinking_text=False):
    try:
        with AtomaSDK(
            bearer_auth=os.environ.get('ATOMA_BEARER'),
        ) as atoma_sdk:
            res = atoma_sdk.chat.create(messages=[
                {
                    "content": query,
                    "role": "user",
                },
            ], model="deepseek-ai/DeepSeek-R1", frequency_penalty=0, max_tokens=2048, n=1, presence_penalty=0, seed=123, stop=[
                "json([\"stop\", \"halt\"])",
            ], temperature=0.7, top_p=1, user="user-1234")
            # Handle response
            if include_thinking_text:
                return res.choices[0].message.content
            else:
                return res.choices[0].message.content.split('</think>')[-1].strip()
    except Exception as ex:
        print(f' error query llm: {ex}')
        return f"error query llm: {ex}"


query_llm('what is the purpose of life?')

In [None]:
from atoma_sdk import AtomaSDK
import os

with AtomaSDK(
    bearer_auth=os.environ.get('ATOMA_BEARER'),
    ) as atoma_sdk:
    res = atoma_sdk.models_.models_list()
    # Handle response
    print(res)

'''
data=[Model(created=1686935002, id='meta-llama/Llama-3.3-70B-Instruct', object='model', owned_by='atoma'), Model(created=1686935002, id='deepseek-ai/DeepSeek-R1', object='model', owned_by='atoma'), Model(created=1686935002, id='intfloat/multilingual-e5-large-instruct', object='model', owned_by='atoma'), Model(created=1686935002, id='black-forest-labs/FLUX.1-schnell', object='model', owned_by='atoma')] object='list'
'''

In [9]:
# basic prompt template
prompt = '''
### **Prompt: Extracting Investor Thought Patterns from a16z Articles**  

**Objective:**  
You are an expert in analyzing investor decision-making. Given a set of articles where a16z explains their investment choices, your task is to extract **common evaluation questions** and **key decision factors** that drive their investments.  

#### **Instructions:**  

1. **Extract Key Questions:**  
   - Identify recurring questions that a16z implicitly or explicitly considers when evaluating startups.  
   - These questions should reflect their thought process about market potential, team quality, business model, competitive advantage, etc.  
   - Phrase the extracted questions in a general form, as if they could be applied to any startup.  

2. **Summarize Decision Factors:**  
   - Identify the core reasons they provide for investing in a startup.  
   - Categorize these reasons (e.g., product differentiation, founder experience, market size).  
   - If possible, quantify their priorities (e.g., "strong emphasis on network effects" or "frequent mention of founder-market fit").  

#### **Format the Output:**  

```
### 1. Key Questions Asked by A16Z  
- [Question 1]  
- [Question 2]  
- ...  

### 2. Common Investment Decision Factors  
#### Category: Market Opportunity  
- [Factor 1]  
- [Factor 2]  

#### Category: Team & Execution  
- [Factor 3]  
- [Factor 4]  
```

#### **Example Output (Hypothetical)**  

```
### 1. Key Questions Asked by A16Z  
- Does the startup have a clear and defensible competitive advantage?  
- Is the founding team uniquely positioned to execute this idea?  
- How does this product create or benefit from network effects?  
- What is the long-term market potential?  

### 2. Common Investment Decision Factors  
#### Category: Market Opportunity  
- Large addressable market with increasing demand  
- Clear pain point with potential for recurring revenue  

#### Category: Team & Execution  
- Experienced founders with previous exits or deep industry expertise  
- Clear, compelling vision with strong execution capability  
```

---
'''


In [7]:
# Add blog posts tp this prompt template
import json
with open('blog_posts.json','r') as f:
    data = json.load(f)

data[0]

{'title': 'Investing in Nash',
 'date': 'July 26, 2022',
 'author': 'Andrew Chen',
 'content': ['We’re excited to announce that a16z is leading the Series A for Nash, an API-based platform for last-mile delivery.\xa0',
  'The Nash marketplace gives businesses the “superpower” of being able to offer delivery, without having to build their own fleet or manage logistics. Nash customers get access to a network of hundreds of local and national providers and immediately receive quotes for delivery requests based on price, availability, and vehicle type. Businesses can use Nash to activate same-day (or even same-hour) delivery for their customers.\xa0',
  'We first partnered with Nash for their seed round in the fall of 2021, shortly after co-founders Mahmoud Ghulman and Aziz Alghunaim launched the company and graduated from YCombinator. At that time, Nash was primarily serving local businesses — many of whom had been hit hard by the pandemic and wanted to compete with Amazon and other mega-

In [11]:
summaries = []
import time

start_time = time.time()

steps = 7
for i in range(0, len(data), steps):
    print(f"{time.time() - start_time}: {i}-{min(len(data)-1, i+steps)}")
    investment_articles = data[i:min(len(data)-1, i+steps)]
    articles_prompt = prompt + "Here are the articles:\n --- \n\n" + f"\n\n\n".join([f"Title: {article['title']} \n\n content:\n{'\n\n'.join(article['content'])}" for article in investment_articles])
    # print(articles_prompt)
    # break
    response = query_llm(articles_prompt)
    summaries.append(response)
    # break


with open('summaries1.json','w') as f:
    json.dump(summaries, f)

'''
time taken:
3172.9seconds : 266-271
'''

0.0008809566497802734: 0-5

### **Prompt: Extracting Investor Thought Patterns from a16z Articles**  

**Objective:**  
You are an expert in analyzing investor decision-making. Given a set of articles where a16z explains their investment choices, your task is to extract **common evaluation questions** and **key decision factors** that drive their investments.  

#### **Instructions:**  

1. **Extract Key Questions:**  
   - Identify recurring questions that a16z implicitly or explicitly considers when evaluating startups.  
   - These questions should reflect their thought process about market potential, team quality, business model, competitive advantage, etc.  
   - Phrase the extracted questions in a general form, as if they could be applied to any startup.  

2. **Summarize Decision Factors:**  
   - Identify the core reasons they provide for investing in a startup.  
   - Categorize these reasons (e.g., product differentiation, founder experience, market size).  
   - If possible,

# Merge summaries

In [12]:
prompt_to_summarise_summary='''

### **Prompt: Merging and Deduplicating Investment Insights**  

**Objective:**  
You have been provided with multiple summaries containing extracted investor questions and decision factors from a16z articles. Your task is to **combine similar questions** and **merge overlapping decision factors** while preserving the meaning.  

#### **Instructions:**  

1. **Deduplicate Questions:**  
   - Identify questions that are phrased differently but ask the same fundamental thing.  
   - Merge them into a single well-structured question that captures the core idea.  
   - Ensure the final list is concise and avoids redundancy.  

2. **Merge Decision Factors:**  
   - Identify decision factors that convey the same meaning but are worded differently.  
   - Consolidate them into a single, clearer statement.  
   - Group them under relevant categories (e.g., Market Opportunity, Team, Product).  

#### **Format the Output:**  

```
### 1. Consolidated Key Questions Asked by A16Z  
- [Merged Question 1]  
- [Merged Question 2]  
- ...  

### 2. Refined Investment Decision Factors  
#### Category: Market Opportunity  
- [Merged Factor 1]  
- [Merged Factor 2]  

#### Category: Team & Execution  
- [Merged Factor 3]  
- [Merged Factor 4]  
```

#### **Example Input (Before Merging)**  

```
### Key Questions (Before Merging)  
- How big is the market for this product?  
- What is the total addressable market (TAM) for this startup?  
- Does this startup have a strong competitive moat?  
- How defensible is this startup's business model?  

### Decision Factors (Before Merging)  
#### Category: Market  
- The startup operates in a growing market with high demand.  
- The total addressable market (TAM) is significant.  

#### Category: Competitive Advantage  
- The company has a strong moat through technology or brand.  
- The startup's business model is highly defensible.  
```

#### **Example Output (After Merging)**  

```
### 1. Consolidated Key Questions Asked by A16Z  
- What is the total addressable market (TAM), and how fast is it growing?  
- Does the startup have a defensible competitive advantage?  

### 2. Refined Investment Decision Factors  
#### Category: Market Opportunity  
- The startup operates in a large and growing market with strong demand.  

#### Category: Competitive Advantage  
- The business model is defensible due to technological innovation or brand strength.  
```

'''



In [None]:

'''
* we should have 271/7 ~ 39 summaries
'''

with open('summaries1.json','r') as f:
    summaries = json.load(f)


In [5]:

summaries2 = []
import time

start_time = time.time()

steps = 7
for i in range(0, len(summaries), steps):
    print(f"{time.time() - start_time}: {i}-{min(len(data)-1, i+steps)}")
    summaries_batch = summaries[i:min(len(summaries)-1, i+steps)]
    
    final_summary_prompt = prompt_to_summarise_summary + "Here are the summaries:\n --- \n\n" + f"\n\n\n".join([f"{summary}" for summary in summaries_batch])
    # print(final_summary_prompt)
    # break
    response = query_llm(final_summary_prompt)
    summaries2.append(response)
    


with open('summaries2.json','w') as f:
    json.dump(summaries2, f)

2.2

# merge the resulting summaries

In [13]:
'''
*  we should have 270/7/7 ~= 6
'''
with open('summaries2.json','r') as f:
    summaries = json.load(f)


In [15]:
summaries3 = []
import time

start_time = time.time()

steps = len(summaries)  # re-summarise all the summaries
for i in range(0, len(summaries), steps):
    print(f"{time.time() - start_time}: {i}-{min(len(data)-1, i+steps)}")
    summaries_batch = summaries[i:min(len(summaries)-1, i+steps)]
    
    final_summary_prompt = prompt_to_summarise_summary + "Here are the summaries:\n --- \n\n" + f"\n\n\n".join([f"{summary}" for summary in summaries_batch])
    # print(final_summary_prompt)
    # break
    response = query_llm(final_summary_prompt)
    summaries3.append(response)
    


with open('summaries3.json','w') as f:
    json.dump(summaries3, f)

0.00023937225341796875: 0-6


NameError: name 'query_llm' is not defined

## final summarise the summaries

In [None]:
'''
*  we should have 270/7/7 ~= 6
'''
with open('summaries3.json','r') as f:
    summaries = json.load(f)


In [None]:
summaries4 = []
import time

start_time = time.time()

steps = 7
for i in range(0, len(summaries), steps):
    print(f"{time.time() - start_time}: {i}-{min(len(data)-1, i+steps)}")
    summaries_batch = summaries[i:min(len(summaries)-1, i+steps)]
    
    final_summary_prompt = prompt_to_summarise_summary + "Here are the summaries:\n --- \n\n" + f"\n\n\n".join([f"{summary}" for summary in summaries_batch])
    print(final_summary_prompt)
    break
    response = query_llm(final_summary_prompt)
    summaries4.append(response)
    


with open('summaries4.json','w') as f:
    json.write(summaries4, f)