In [8]:
import requests
import json
import base64


def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

def generate_response_with_image(prompt, image_path, model="llava"):
    ollama_url = "http://ollama:11434/api/generate"
    
    image_base64 = encode_image(image_path)
    data = {
        "model": model,
        "prompt": prompt,
        "images": [image_base64]
    }

    try:
        response = requests.post(ollama_url, json=data, stream=True)
        response.raise_for_status()
        full_response = ""
        for line in response.iter_lines():
            if line:
                decoded_line = json.loads(line)
                if 'response' in decoded_line:
                    full_response += decoded_line['response']
        return full_response.strip()
    except requests.exceptions.RequestException as e:
        return f"Error: {str(e)}"

def generate_response(prompt, model="llava"):
    ollama_url = "http://ollama:11434/api/generate"
    
    data = {
        "model": model,
        "prompt": prompt,
    }
    
    try:
        response = requests.post(ollama_url, json=data, stream=True)
        response.raise_for_status()
        full_response = ""
        for line in response.iter_lines():
            if line:
                decoded_line = json.loads(line)
                if 'response' in decoded_line:
                    full_response += decoded_line['response']
        return full_response.strip()
    except requests.exceptions.RequestException as e:
        return f"Error: {str(e)}"

def chat(messages, model="llava"):
    ollama_url = "http://ollama:11434/api/chat"
    
    data = {
        "model": model,
        "messages": messages
    }
    
    try:
        response = requests.post(ollama_url, json=data, stream=True)
        response.raise_for_status()
        full_response = ""
        for line in response.iter_lines():
            if line:
                decoded_line = json.loads(line)
                if 'message' in decoded_line and 'content' in decoded_line['message']:
                    full_response += decoded_line['message']['content']
        return full_response.strip()
    except requests.exceptions.RequestException as e:
        return f"Error: {str(e)}"

def test_ollama_connection():
    try:
        response = requests.get("http://ollama:11434")
        response.raise_for_status()
        return "Successfully connected to Ollama"
    except requests.exceptions.RequestException as e:
        return f"Failed to connect to Ollama: {str(e)}"

def list_models():
    try:
        response = requests.get("http://ollama:11434/api/tags")
        response.raise_for_status()
        return response.json()['models']
    except requests.exceptions.RequestException as e:
        return f"Error listing models: {str(e)}"

In [7]:


print(test_ollama_connection())
print("Available models:", list_models())

if "Successfully" in test_ollama_connection():
    print("\nTesting generate_response:")
    response = generate_response("Tell me a short joke about programming")
    print("Ollama response:", response)
    
    print("\nTesting chat:")
    messages = [
        {"role": "user", "content": "Hello, who are you?"},
        {"role": "assistant", "content": "Hello! I'm an AI assistant created by Anthropic. How can I help you today?"},
        {"role": "user", "content": "Can you tell me a joke about AI?"}
    ]
    chat_response = chat(messages)
    print("Ollama chat response:", chat_response)
else:
    print("Skipping tests due to connection error")

Successfully connected to Ollama
Available models: [{'name': 'llava:latest', 'model': 'llava:latest', 'modified_at': '2024-07-14T15:56:03.693664959Z', 'size': 4733363377, 'digest': '8dd30f6b0cb19f555f2c7a7ebda861449ea2cc76bf1f44e262931f45fc81d081', 'details': {'parent_model': '', 'format': 'gguf', 'family': 'llama', 'families': ['llama', 'clip'], 'parameter_size': '7B', 'quantization_level': 'Q4_0'}}]

Testing generate_response:
Ollama response: Why did the programmer quit his job?

Because he didn't get arrays.

Testing chat:
Ollama chat response: Sure, here's one:

Why don't AI algorithms ever get lost?

Because they always know where to code!


In [9]:
generate_response_with_image("What's in this image?", "./pipboy.png")

'The image shows a video game character, specifically from the "Fallout" series, which is a popular post-apocalyptic game. In this specific screen, we can see a menu that appears to be related to cleaning tasks within the game. There are options like "Cleansing the Commonwealth," "Quartermastery," and "Getting a Cure." The setting seems to be from one of the missions in the game where the character has to clean up various items scattered around, possibly after a battle or event in the game. The menu also includes a timer that counts down from 10:27 to 10:28 PM and a status bar that reads "Clear out Super Mart," suggesting that the task at hand is related to clearing out an area called Super Mart. The overall look of the image suggests it\'s from a retro-styled game with pixelated graphics, which is characteristic of many games from the late 90s and early 2000s.'

In [22]:
headers = "FECHA DIA/MES,  DETALLE DE TRANSACCION, SUCURSAL, N° DOCTO, MONTO CHEQUES O CARGOS, MONTO DEPOSITOS O ABONOS, SALDO"
response_example = dict(
    user="juan perez",
    email="mail@mail.com",
    account_type="cuenta vista",
    ejecutivo="paulo diaz",
    sucursal="oficina bla bla",
    account_number="XXXXXXXblabla",
    cartola_number=0,
    currency="pesos",
    from_date="01/01/2020",
    to_date="01/02/2020",
    transactions=[
        dict(
            date="31/05",
            detail="PAGO: casa something",
            sucursal="INTERNET",
            cargo=10,
            abono=0,
            saldo=990
        )
    ]
)
response = generate_response_with_image(
    f"Extract all the bank information. Return the information in a valid JSON format. Do not include any explanations or additional text outside the JSON structure. This is an example of the response format data: {response_example}.",
    "./cartola.png"
)
response

'```json\n{\n  "user": "JUAN PEREZ",\n  "account_type": "CUENTA VISTA",\n  "ejecutivo": "PAULO DIAZ",\n  "sucursal": "OFICINA BLA BLA",\n  "account_number": "XXXXXXXblabla",\n  "cartola_number": 0,\n  "currency": "PESOS",\n  "transactions": [\n    {\n      "date": "31/05",\n      "detail": "PAGO: CASA SOMETHING",\n      "sucursal": "INTERNET",\n      "cargo": 10,\n      "abono": 0,\n      "saldo": 990\n    }\n  ]\n}\n```'

In [23]:
json_string = response.strip().lstrip('```json').rstrip('```')

json.loads(json_string)

{'user': 'JUAN PEREZ',
 'account_type': 'CUENTA VISTA',
 'ejecutivo': 'PAULO DIAZ',
 'sucursal': 'OFICINA BLA BLA',
 'account_number': 'XXXXXXXblabla',
 'cartola_number': 0,
 'currency': 'PESOS',
 'transactions': [{'date': '31/05',
   'detail': 'PAGO: CASA SOMETHING',
   'sucursal': 'INTERNET',
   'cargo': 10,
   'abono': 0,
   'saldo': 990}]}