# Calling External model endpoints

## Objective

This tutorial calls multiple model endpoints using URL and auth key. It captures requests and responses.

## Open AI - GPT 2 Model

In [None]:
import requests

API_URL = "https://api-inference.huggingface.co/models/openai-community/gpt2"
headers = {"Authorization": "Bearer ***"}

def query(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.json()
	
output = query({
	"inputs": "What is the capital of France?",
})

print("\nResponse :")
output

## Tiny Lama 1.1B - HuggingFace Endpoint

In [None]:

API_URL = "https://api-inference.huggingface.co/models/TinyLlama/TinyLlama-1.1B-Chat-v1.0/v1/chat/completions"
headers = {"Authorization": "Bearer ***"}

def query(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.json()
	
output = query({
	"model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
	"messages": [{
		"role": "user", 
		"content": "What is the capital of France?"
		}],
	"max_tokens": 500,
	"stream": False
	})

print("\nResponse :")
output

## Phi3 Mini - Deployed in Azure AI as Serverless endpoint - Model as a Service (MaaS)

In [None]:

# serverless

API_URL = "https://Phi-3-mini-4k-instruct-rqvel.eastus2.models.ai.azure.com/v1/chat/completions"
headers = {"Authorization": "Bearer ***"}

def query(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.json()
	
output = query({
	"messages": [{
		"role": "user", 
		"content": "What is the capital of France?"
		}],
	"max_tokens": 500
	})

print("\nResponse :")
output

## Google T5 Efficient Mini-3

In [None]:

API_URL = "https://waqasjaved-5368-qyibl.eastus2.inference.ml.azure.com/score"
headers = {"Authorization": "Bearer ***"}


def query(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.json()
	
output = query({
			'inputs': 'What is the capital of France'
		}
)

print("\nResponse :")
output

## Mistral 8x 7B Instruct 01 

In [None]:

# serverless

API_URL = "https://mistral-7b-east1092381.eastus2.inference.ml.azure.com/chat/completions"
headers = {"Content-Type" : "application/json", "Authorization": "Bearer ***"}

def query(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.json()
	
output = query(
{ 
  "messages": [ 
    { 
      "content": "What is the capital of France?", 
      "role": "user" 
    } 
  ], 
  "max_tokens": 50
}
)

print("\nResponse :")
output