In [1]:
import random

In [2]:
def monte_carlo_eval(prompt):
    # Simulating different types of responses
    response_types = ['highly relevant', 'somewhat relevant', 'irrelevant']
    scores = {'highly relevant': 3, 'somewhat relevant': 2, 'irrelevant': 1}

    # Perform multiple random trials
    trials = 100
    total_score = 0
    for _ in range(trials):
        response = random.choice(response_types)
        total_score += scores[response]

    # Average score represents the evaluation
    return total_score / trials

def elo_eval(prompt, base_rating=1500):
    # Simulate the outcome of the prompt against standard criteria
    # Here, we randomly decide if the prompt 'wins', 'loses', or 'draws'
    outcomes = ['win', 'loss', 'draw']
    outcome = random.choice(outcomes)

    # Elo rating formula parameters
    K = 30  # Maximum change in rating
    R_base = 10 ** (base_rating / 400)
    R_opponent = 10 ** (1600 / 400)  # Assuming a fixed opponent rating
    expected_score = R_base / (R_base + R_opponent)

    # Calculate the new rating based on the outcome
    actual_score = {'win': 1, 'loss': 0, 'draw': 0.5}[outcome]
    new_rating = base_rating + K * (actual_score - expected_score)

    return new_rating

In [3]:
def elo_ratings_func(prompts, elo_ratings, K=30, opponent_rating=1600):
    """
    Update Elo ratings for a list of prompts based on simulated outcomes.

    Parameters:
    prompts (list): List of prompts to be evaluated.
    elo_ratings (dict): Current Elo ratings for each prompt.
    K (int): Maximum change in rating.
    opponent_rating (int): Fixed rating of the opponent for simulation.

    Returns:
    dict: Updated Elo ratings.
    """

    for prompt in prompts:
        # Simulate an outcome against the standard criteria or another prompt
        outcome = random.choice(['win', 'loss', 'draw'])

        # Calculate the new rating based on the outcome
        actual_score = {'win': 1, 'loss': 0, 'draw': 0.5}[outcome]
        R_base = 10 ** (elo_ratings[prompt] / 400)
        R_opponent = 10 ** (opponent_rating / 400)
        expected_score = R_base / (R_base + R_opponent)
        elo_ratings[prompt] += K * (actual_score - expected_score)

    return elo_ratings

# Example usage
prompts = ["Who founded OpenAI?", 
                "What was the initial goal of OpenAI?",
                "What did OpenAI release in 2016?", 
                "What project did OpenAI showcase in 2018?",
                "How did the AI agents in OpenAI Five work together?"
                ]
elo_ratings = {prompt: 1500 for prompt in prompts}  # Initial ratings

# Conduct multiple rounds of evaluation
for _ in range(10):  # Number of rounds
    elo_ratings = elo_ratings_func(prompts, elo_ratings)

# Sort prompts by their final Elo ratings
sorted_prompts = sorted(prompts, key=lambda x: elo_ratings[x], reverse=True)

# Print the ranked prompts
for prompt in sorted_prompts:
    print(f"{prompt}: {elo_ratings[prompt]}")

How did the AI agents in OpenAI Five work together?: 1544.8041192047763
What project did OpenAI showcase in 2018?: 1534.6085225403153
Who founded OpenAI?: 1534.2668516559863
What was the initial goal of OpenAI?: 1502.2302022968731
What did OpenAI release in 2016?: 1496.9007702720478


In [4]:
def evaluate_prompt(main_prompt, test_cases):
    evaluations = {}

    # Evaluate the main prompt using Monte Carlo and Elo methods
    evaluations['main_prompt'] = {
        'Monte Carlo Evaluation': monte_carlo_eval(main_prompt),
        'Elo Rating Evaluation': elo_eval(main_prompt)
    }

    # Evaluate each test case
    for idx, test_case in enumerate(test_cases):
        evaluations[f'test_case_{idx+1}'] = {
            'Monte Carlo Evaluation': monte_carlo_eval(test_case),
            'Elo Rating Evaluation': elo_eval(test_case)
        }

    return evaluations

In [5]:
main_prompt = "why we use OepenAI?"
test_cases = ["Who founded OpenAI?", 
                "What was the initial goal of OpenAI?",
                "What did OpenAI release in 2016?", 
                "What project did OpenAI showcase in 2018?",
                "How did the AI agents in OpenAI Five work together?"
                ]
result = evaluate_prompt(main_prompt, test_cases)
print(result)

{'main_prompt': {'Monte Carlo Evaluation': 1.85, 'Elo Rating Evaluation': 1519.2019499940866}, 'test_case_1': {'Monte Carlo Evaluation': 2.06, 'Elo Rating Evaluation': 1519.2019499940866}, 'test_case_2': {'Monte Carlo Evaluation': 1.99, 'Elo Rating Evaluation': 1519.2019499940866}, 'test_case_3': {'Monte Carlo Evaluation': 2.07, 'Elo Rating Evaluation': 1504.2019499940866}, 'test_case_4': {'Monte Carlo Evaluation': 2.15, 'Elo Rating Evaluation': 1489.2019499940866}, 'test_case_5': {'Monte Carlo Evaluation': 1.91, 'Elo Rating Evaluation': 1519.2019499940866}}


In [9]:
import requests
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter  
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Weaviate
import weaviate
from weaviate.embedded import EmbeddedOptions
from dotenv import load_dotenv,find_dotenv
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Weaviate
import weaviate
from weaviate.embedded import EmbeddedOptions
from dotenv import load_dotenv,find_dotenv
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser

ModuleNotFoundError: No module named 'weaviate'