In [1]:
import re
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import asyncio
from tqdm import tqdm  # can wrap asyncio.as_completed
import nest_asyncio

import ollama
from ollama import chat
from ollama import ChatResponse
from ollama import AsyncClient

llm_base = 'llama3.2:1b'

system_tail = "judging a young entrepreneur's product pitch. You must fairly evaluate if the product pitch is strong enough to raise money through crowdfunding."

ollama.create(model='wonderful', from_=llm_base, system=f"You are Mr. Wonderful from Shark Tank, {system_tail}")
ollama.create(model='daymond', from_=llm_base, system=f"You are Daymond John from Shark Tank, {system_tail}")
ollama.create(model='barbara', from_=llm_base, system=f"You are Barbara Corcoran from Shark Tank, {system_tail}")
ollama.create(model='mark', from_=llm_base, system=f"You are Mark Cuban from Shark Tank, {system_tail}")
ollama.create(model='evaluator', from_=llm_base, system=f"You are a machine that outputs single-digit integers based on a query.")

response: ChatResponse = chat(model='wonderful', messages=[
  {
    'role': 'user',
    'content': 'Why is the sky blue?',
  },
])
print(response['message']['content'])

A simple question that sparks imagination and curiosity.

The color of the sky is a fascinating topic, and there are several scientific explanations for why it appears blue. Here's a breakdown:

1. **Scattering of light**: When sunlight enters Earth's atmosphere, it encounters tiny molecules of gases such as nitrogen (N2) and oxygen (O2). These molecules scatter the light in all directions, but they scatter shorter (blue) wavelengths more than longer (red) wavelengths.
2. **Rayleigh scattering**: This is the primary reason why the sky appears blue. The smaller particles in the atmosphere, like N2 and O2, scatter the light in a way that favors the blue region of the spectrum. This effect becomes more pronounced during the daytime when the sun is overhead.
3. **Atmospheric conditions**: The color of the sky can also be affected by atmospheric conditions, such as pollution, dust, and water vapor. For example, aerosols like smoke or pollutants can scatter light in a way that makes the sky 

In [2]:
def extract_int(s: str) -> int | None:
    """
    Extracts the first integer found in the input string.
    Returns the integer, or None if no integer is present.
    """
    match = re.search(r'-?\d+', s)
    return int(match.group()) if match else None

# Example
text = "I would score it a 2."
print(extract_int(text))  # → 2


2


In [3]:
df = pd.read_csv("data/post_eda.csv")
#print(df['Unnamed: 0'])
df.drop(columns=["Unnamed: 0"], inplace=True)
print(df.columns)

Index(['name', 'blurb', 'disable_communication', 'country', 'deadline',
       'created_at', 'launched_at', 'staff_pick', 'backers_count',
       'usd_pledged', 'spotlight', 'name_len', 'name_len_clean', 'blurb_len',
       'blurb_len_clean', 'create_to_launch_days', 'launch_to_deadline_days',
       'launch_to_state_change_days', 'SuccessfulBool', 'USorGB', 'TOPCOUNTRY',
       'LaunchedTuesday', 'DeadlineWeekend', 'usd_goal', 'cat_Academic',
       'cat_Apps', 'cat_Blues', 'cat_Comedy', 'cat_Experimental',
       'cat_Festivals', 'cat_Flight', 'cat_Gadgets', 'cat_Hardware',
       'cat_Immersive', 'cat_Makerspaces', 'cat_Misc', 'cat_Musical',
       'cat_Places', 'cat_Plays', 'cat_Restaurants', 'cat_Robots',
       'cat_Shorts', 'cat_Software', 'cat_Sound', 'cat_Spaces',
       'cat_Thrillers', 'cat_Wearables', 'cat_Web', 'cat_Webseries',
       'usd_goal (log)'],
      dtype='object')


In [4]:
nest_asyncio.apply()

async def judge_pitch_async(judge_model_str: str,
                            silent: bool = True,
                            max_concurrent: int = 60):
    pitch_cols = ['name', 'blurb', 'usd_goal']
    pitch_df = df[pitch_cols]

    # make sure our result columns exist
    df[f'sharktank_{judge_model_str}'] = ''
    df[f'sharktank_{judge_model_str}_eval'] = 0

    client = AsyncClient()
    sem = asyncio.Semaphore(max_concurrent)

    async def _process(idx, row):
        async with sem:
            name  = row['name']
            blurb = row['blurb']
            goal  = row['usd_goal']

            pitch = f"""
            Thank you for inviting us, Sharks. I'm asking ${goal} for our idea:
            {name}
            {blurb}
            """

            # 1) get the “judge” response
            resp1 = await client.chat(
                model=judge_model_str,
                messages=[{'role':'user','content':pitch}]
            )
            message = resp1['message']['content']

            # 2) get the 0–9 evaluation from your base LLM
            eval_prompt = (
                "Read the following conversation between a fictional entrepreneur and a fictional judge: " +
                f"Entrepreneur: {pitch}\n" +
                f"Judge: {message}\n" +
                "Provide a single-digit response (0-9) scoring whether or not the judge thinks the entrepreneur's idea is good, based on their conversation. Only provide a single-digit integer in your response."
            )
            evaluation = None
            while evaluation is None:
                resp2 = await client.chat(
                    model='evaluator',
                    messages=[{'role':'user','content':eval_prompt}]
                )
                evaluation = extract_int(resp2['message']['content'])

            return idx, pitch, message, evaluation

    # fire off all tasks
    tasks = [
        asyncio.create_task(_process(idx, row))
        for idx, row in pitch_df.iterrows()
    ]

    # collect results as they finish, updating df
    for future in tqdm(asyncio.as_completed(tasks),
                       total=len(tasks),
                       desc=f"Processing pitches for {judge_model_str}"):
        idx, pitch, message, evaluation = await future

        if not silent:
            print(pitch)
            print(message)
            print(evaluation)
            print()

        df.at[idx, f'sharktank_{judge_model_str}']      = message
        df.at[idx, f'sharktank_{judge_model_str}_eval'] = evaluation


In [None]:
asyncio.run(judge_pitch_async('wonderful'))

Processing pitches for wonderful:   4%|▎         | 750/20632 [17:00<3:36:23,  1.53it/s] 

In [None]:
asyncio.run(judge_pitch_async('daymond'))

In [None]:
asyncio.run(judge_pitch_async('barbara'))

In [None]:
asyncio.run(judge_pitch_async('mark'))

In [None]:
# Save post sharktank data
print(df.columns)
df = df.sample(frac=1).reset_index(drop=True) # shuffle!
df.to_csv("data/post_sharktank.csv")