# Calling External model endpoints

## Objective

This tutorial calls multiple model endpoints using URL and auth key. It captures requests and responses.

## Open AI - GPT 2 Model

In [19]:
import requests

API_URL = "https://api-inference.huggingface.co/models/openai-community/gpt2"
headers = {"Authorization": "Bearer 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"}

def query(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.json()
	
output = query({
	"inputs": "What is the capital of France?",
})

print("\nResponse : " + str(output))


Response : [{'generated_text': 'What is the capital of France? : Paris City? "We don\'t want to sound silly." Meet your friend, Dörner. We hope you find your life somewhere that does not bleed white. And just kidding, check out The Goldbergs.\n\nAll serious musicians need to take notice: Rodgers\' The Wolf of Wall Street has definitively established a universal cultural impression. It excels at narrative and abstract comedy, but it cuts its heavy-handed exploration of money by using genre and attitude to tangle brothers and their ad'}]


## Tiny Lama 1.1B - HuggingFace Endpoint

In [20]:
import requests 

API_URL = "https://api-inference.huggingface.co/models/TinyLlama/TinyLlama-1.1B-Chat-v1.0/v1/chat/completions"
headers = {"Authorization": "Bearer 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"}

def query(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.json()
	
output = query({
	"model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
	"messages": [{
		"role": "user", 
		"content": "What is the capital of France?"
		}],
	"max_tokens": 500,
	"stream": False
	})

print("\nResponse : " + str(output))


Response : {'object': 'chat.completion', 'id': '', 'created': 1721854953, 'model': 'TinyLlama/TinyLlama-1.1B-Chat-v1.0', 'system_fingerprint': '2.1.1-sha-4dfdb48', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': 'The capital of France is Paris, located in the Ile-de-France region.'}, 'logprobs': None, 'finish_reason': 'eos_token'}], 'usage': {'prompt_tokens': 30, 'completion_tokens': 19, 'total_tokens': 49}}


## Phi3 Mini - Deployed in Azure AI as Serverless endpoint - Model as a Service (MaaS)

In [21]:
import requests 

# serverless

API_URL = "https://Phi-3-mini-4k-instruct-rqvel.eastus2.models.ai.azure.com/v1/chat/completions"
headers = {"Authorization": "Bearer J6HAqLPf6jyC0ApRXkXRE0cdSpdINcgm"}

def query(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.json()
	
output = query({
	"messages": [{
		"role": "user", 
		"content": "What is the capital of France?"
		}],
	"max_tokens": 500
	})

print("\nResponse : " + str(output))


Response : {'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'content': " The capital of France is Paris. It is not only the country's largest city but also its administrative, commercial, and cultural center. Paris is known for its landmarks such as the Eiffel Tower, Notre-Dame Cathedral, and the Louvre Museum, which is the world's largest art museum. As a major global city, Paris plays an integral role in Europe's economy and serves as a hub for international diplomacy, business, and tourism.", 'role': 'assistant', 'tool_calls': []}}], 'created': 1721856493, 'id': 'cmpl-b5b17aca4b144563b421587ad58b4955', 'model': 'phi3-mini-4k', 'object': 'chat.completion', 'usage': {'completion_tokens': 99, 'prompt_tokens': 10, 'total_tokens': 109}}


## Phi3 Mini - Deployed in Azure AI on Managed Compute

In [22]:
import requests 

# managed compute

API_URL = "https://waqasjaved-5368-srcvb.eastus2.inference.ml.azure.com/chat/completions"
headers = {"Authorization": "Bearer Ck59nY3P0VQSz5AwaoG7InnyLFeqf590"}


def query(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.json()
	
output = query({
	"messages": [{
		"role": "user", 
		"content": "What is the capital of France?"
		}],
	"max_tokens": 500
	})

print("\nResponse : " + str(output))


Response : {'id': 'cmpl-94f1aace11ed4298a87ebefe48402103', 'object': 'chat.completion', 'created': 1721856495, 'model': '', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': ' The capital of France is Paris. It is not only the capital city but also the most populous city of France. As the center for French government and administration, Paris is a major hub for culture, art, fashion, and gastronomy.', 'tool_calls': []}, 'finish_reason': 'stop'}], 'usage': {'prompt_tokens': 10, 'total_tokens': 61, 'completion_tokens': 51}}


## Google T5 Efficient Mini-3

In [23]:
import requests 

API_URL = "https://waqasjaved-5368-qyibl.eastus2.inference.ml.azure.com/score"
headers = {"Authorization": "Bearer F4z7o6YB9Kqd3sl9edTeZpiijf1kWy1M"}


def query(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.json()
	
output = query({
			'inputs': 'What is the capital of France'
		}
)

print("\nResponse : " + str(output))


Response : [{'generated_text': 'France.  - - - - - -'}]
