In [1]:
import requests
import json


class ModelOperatorOllama():
    def __init__(self, model:str):
        self.url = 'http://127.0.0.1:11435/api/'
        self.model_list = self.list_models()
        
        if model in self.model_list:
            self.model = model
        else:
            raise ValueError(f"Model '{model}' not provided or not found in available models: {self.model_list}")
        

    def generate_response(self,system_prompt,prompt,format=None):
        """
        Sends a request to the LLM API and returns the response.
        """
        url="http://127.0.0.1:11435/api/chat"
        payload = {
            "model": self.model,
            "messages": [
                {'role':'system',
                'content':system_prompt
                },
                {'role':'user',
                'content':prompt}
                ],
            "stream": False
        }
        
        if format:
            payload.update(format)

        headers = {"Content-Type": "application/json"}
        
        response = requests.post(url, data=json.dumps(payload), headers=headers)

        if response.status_code == 200:
            return response.json()  # Return the JSON response
        else:
            return {"error": f"Request failed with status {response.status_code}", "details": response.text}
        
    def unload_model(self):
        """
        Sends a request to the LLM API to set keep_alive parameter.
        """
        url="http://127.0.0.1:11435/api/chat"
        payload = {
            "model": self.model,
            "keep_alive": 0,
            "stream": False
        }

        headers = {"Content-Type": "application/json"}

        response = requests.post(url, data=json.dumps(payload), headers=headers)

        if response.status_code == 200:
            return response.json()  # Return the JSON response
        else:
            return {"error": f"Request failed with status {response.status_code}", "details": response.text}

    def list_models(self):
        
        response = requests.get(f'{self.url}tags')
        model_dict = {
            model['name']: {'model_name': model['name'], 'param_size': model['details']['parameter_size']} for model in response.json()['models']
    }
        return model_dict

# response = generate_response('you are a comedian',f'say 2', model="llama3.2")
# llmp = ModelOperatorOllama('phi4:latest')
# system_prompt = 'you are a finance expert who can understand any piece of news and its implications on the markets. You excel at forex forecasting. Given the following piece of news, make your forecast'

# prompt = df_news.iloc[0]['article_text']

# format = {'format':{
#     "type": "object",
#     "properties": {
#         "currency": {
#             "type": "string",
#             "enum": ["USD", "EUR", "GBP", "JPY", "INR"],
#             "description": "Currency code most affected by this news."
#         },
#         "amount": {
#             "type": "integer",
#             "description": "Sign of the signal, 0 for negative and 1 for positive."
#         },
#         "strenght": {
#             "type": "number",
#             "description": "strenght of the signal, 0 for minumum and 1 for maximum."
#         },
#         "type": {
#             "type": "string",
#             "enum": ["descriptive","predictive"],
#             "description": "whether the signal refers to past or future"
#         }
#     },
#     "required": ["currency", "amount"],
# }}

# llmp.generate_response('','write numbers to 10')
# response = llmp.generate_response(system_prompt,prompt,format)
# llmp.list_models()
# llmp.unload_model()

In [2]:
llmp = ModelOperatorOllama('phi4:latest')
llmp.generate_response('','write numbers to 10')

{'model': 'phi4:latest',
 'created_at': '2025-02-01T08:12:09.2726343Z',
 'message': {'role': 'assistant',
  'content': 'Sure! Here are the numbers from 1 to 10:\n\n1, 2, 3, 4, 5, 6, 7, 8, 9, 10\n\nIf you need any more information or have questions about these numbers, feel free to ask!'},
 'done_reason': 'stop',
 'done': True,
 'total_duration': 55488676100,
 'load_duration': 27557120600,
 'prompt_eval_count': 21,
 'prompt_eval_duration': 2698000000,
 'eval_count': 61,
 'eval_duration': 24931000000}

In [3]:
llmp.unload_model()

{'model': 'phi4:latest',
 'created_at': '2025-02-01T08:12:29.5095742Z',
 'message': {'role': 'assistant', 'content': ''},
 'done_reason': 'unload',
 'done': True}