In [21]:
url = "http://localhost:1143/api/chat"

In [22]:
import requests
import json


def llama(prompt, model):
    data = {
        "model": model,
        "messages": [
            {
              "role": "user",
              "content": prompt
            }
        ],
        "stream": False
    }
    
    headers = {
        'Content-Type': 'application/json'
    }
    
    response = requests.post(url, headers=headers, json=data)
    return(response.json()['message']['content'])

In [23]:
response = llama("who wrote the book godfather", "llama3.1")
print(response)

The novel "The Godfather" was written by Mario Puzo. It was published in 1969 and is considered one of the most influential crime novels of all time.

Mario Puzo (1920-1999) was an Italian-American writer, best known for his work on screenplays and novels that often explored themes of crime, family, and the Italian-American experience. In addition to "The Godfather," he wrote several other novels and screenplays, including "The Last Don" and the screenplay for Francis Ford Coppola's film adaptation of "The Godfather."

Puzo was born in New York City to Italian immigrant parents and grew up in a working-class neighborhood in Manhattan. He served in World War II and later earned a Ph.D. in literature from Georgetown University.

"The Godfather" is widely regarded as Puzo's masterpiece, and its success helped establish him as a prominent figure in American literature. The novel follows the Corleone family, a powerful Italian-American Mafia clan, as they navigate power struggles, loyalty, 

In [24]:
response = llama("can you explain theory of relativity in just two sentences?", "llama3.1")
print(response)

Here is a 2-sentence summary of the Theory of Relativity:

According to Albert Einstein's Theory of Special Relativity (1905), time and space are relative, and their measurement depends on the observer's frame of reference: time can appear to slow down or speed up depending on an object's speed and proximity to a massive body. The Theory of General Relativity (1915) built upon this idea, proposing that gravity is not a force, but rather the curvature of spacetime caused by massive objects, which affects the motion of other objects.


In [25]:
import pandas as pd
import time

In [41]:
df = pd.read_csv("IMDB_Dataset.csv", nrows=8)

In [30]:
start = time.time()
for _, row in df.iterrows():
    movie_review = row.review
    prompt = f"I will give you a movie review, you just need to tell me if it is positve or negative review. \
    Just output one word POSITIVE if the review is has a positive sentiment and \
    NEGATIVE if the review has a negative sentiment. \
    Movie Review: {movie_review}"
    ans = llama(prompt, "llama3.1")
    print(ans)

end = time.time()
print(f"Took - {end - start} seconds")

POSITIVE
POSITIVE
POSITIVE
NEGATIVE
POSITIVE
Took - 7.355223178863525 seconds


In [31]:
import asyncio
import aiohttp

In [32]:
url

'http://localhost:1143/api/chat'

In [34]:
# Asynchronous function to make requests
async def llama_async(session, prompt, model):
    data = {
        "model": model,
        "messages": [
            {
                "role": "user",
                "content": prompt
            }
        ],
        "stream": False
    }
    
    headers = {
        'Content-Type': 'application/json'
    }
    async with session.post(url, headers=headers, json=data) as response:
        result = await response.json()
        return result['message']['content']

In [42]:
# Main function to process all reviews
async def process_reviews():
    async with aiohttp.ClientSession() as session:
        tasks = []
        for _, row in df.iterrows():
            movie_review = row.review
            prompt = f"I will give you a movie review, you just need to tell me if it is positive or negative review. \
Just output one word POSITIVE if the review has a positive sentiment and \
NEGATIVE if the review has a negative sentiment. \
Movie Review: {movie_review}"
            tasks.append(llama_async(session, prompt, "llama3.1"))

        results = await asyncio.gather(*tasks)  # Run all tasks concurrently
        for sentiment in results:
            print(sentiment)


In [44]:
start = time.time()
# Handling the running event loop
try:
    # If there's already a running event loop
    asyncio.get_running_loop()
    task = asyncio.create_task(process_reviews())  # Schedule the task
    await task
except RuntimeError:
    # If no event loop is running
    asyncio.run(process_reviews())
    
end = time.time()
print(f"Took - {end - start} seconds")

POSITIVE
POSITIVE
POSITIVE
NEGATIVE
POSITIVE
POSITIVE
POSITIVE
NEGATIVE
Took - 1.0214169025421143 seconds


In [None]:
# launchctl setenv OLLAMA_HOST "0.0.0.0" OLLAMA_NUM_PARALLEL 10

# OLLAMA_NUM_PARALLEL=10 OLLAMA_HOST=127.0.0.1:1143 ollama serve
    

In [25]:
start = time
import asyncio
import aiohttp
import time

url = "http://localhost:1143/api/chat"

# Function to send a single request
async def send_request(session, prompt, model):
    data = {
        "model": model,
        "messages": [
            {"role": "user", "content": prompt}
        ],
        "stream": False
    }
    headers = {"Content-Type": "application/json"}

    async with session.post(url, headers=headers, json=data) as response:
        return await response.json()

# Test concurrency
async def test_concurrency():
    prompts = [
        "What is the capital of France?",
        "Explain the theory of relativity.",
        "Summarize the plot of 'The Matrix'.",
        "What is the square root of 16?",
        "Describe the process of photosynthesis."
    ]

    async with aiohttp.ClientSession() as session:
        tasks = [
            send_request(session, prompt, "llama3.1") for prompt in prompts
        ]
        start = time.time()
        results = await asyncio.gather(*tasks)
        end = time.time()
        print(f"Total time for concurrent requests: {end - start} seconds")
        for result in results:
            print(result)

# Run the test
# asyncio.run(test_concurrency())
asyncio.get_running_loop()
task = asyncio.create_task(test_concurrency())  # Schedule the task
await task


Total time for concurrent requests: 62.753661155700684 seconds
{'model': 'llama3.1', 'created_at': '2024-12-26T10:11:56.505031Z', 'message': {'role': 'assistant', 'content': 'The capital of France is Paris.'}, 'done_reason': 'stop', 'done': True, 'total_duration': 4419246541, 'load_duration': 1088405916, 'prompt_eval_count': 17, 'prompt_eval_duration': 404000000, 'eval_count': 8, 'eval_duration': 722000000}
{'model': 'llama3.1', 'created_at': '2024-12-26T10:12:54.825493Z', 'message': {'role': 'assistant', 'content': "One of the most famous and influential scientific theories of the 20th century! Albert Einstein's Theory of Relativity is a fundamental concept in modern physics that has revolutionized our understanding of space, time, and gravity.\n\n**What is the Theory of Relativity?**\n\nThe Theory of Relativity consists of two main components: Special Relativity (SR) and General Relativity (GR). Both theories are built upon Einstein's insight that the laws of physics are the same for