Skip to content

Commit

Permalink
17 defence xml tagging (#34)
Browse files Browse the repository at this point in the history
* WIP: Defence mechanism info box

* WIP: visual change when a defence is clicked on

* WIP: Backend support to get and set defences

* WIP: Frontend can now (de)activate defence

* Fix comparison bug

* Fix calling bug

* WIP: Character limit backend detection

* WIP: Defence flashes red when triggered

* Working defence mechanism

* Add random sequence enclosure frontend selection

* Transform prompt with random sequence enclosure

* Move transform func to defence. Configurations as env variables

* Display original and transformed prompt in chatbox

* Change colour of edited chatbot message

* Add XML tagging defence

* Refactor message transformation

* Detect triggered defences function. detect XML tagging

* Move defence detection to service so we can apply to original message

* clean up

* pass in original message to detect function

* update xml tagging description

---------

Co-authored-by: George Sproston <gsproston@scottlogic.com>
  • Loading branch information
1 parent 9bb3d41 commit d5ea860
Show file tree
Hide file tree
Showing 6 changed files with 130 additions and 43 deletions.
100 changes: 83 additions & 17 deletions backend/defence.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,12 @@ const defences = [
},
{
id: "RANDOM_SEQUENCE_ENCLOSURE",
isActive: false,
isActive: false,
},
{
id: "XML_TAGGING",
isActive: false,
}
];

// activate a defence
Expand Down Expand Up @@ -42,34 +46,96 @@ function isDefenceActive(id) {
return false;
}

function generate_random_string(string_length){
function generate_random_string(string_length) {
let random_string = '';
for(let i = 0; i < string_length; i++) {
const random_ascii = Math.floor((Math.random() * 25) + 97);
random_string += String.fromCharCode(random_ascii)
for (let i = 0; i < string_length; i++) {
const random_ascii = Math.floor((Math.random() * 25) + 97);
random_string += String.fromCharCode(random_ascii)
}
return random_string
}

// apply defence string transformations to original message
function transformMessage(message){
if (isDefenceActive("RANDOM_SEQUENCE_ENCLOSURE")){
console.debug("Random Sequence Enclosure defence active.");
const randomString = generate_random_string(process.env.RANDOM_SEQ_ENCLOSURE_LENGTH);
const introText = process.env.RANDOM_SEQ_ENCLOSURE_PRE_PROMPT;
let transformedMessage = introText.concat(randomString, " {{ ", message, " }} ", randomString, ". ");
console.debug("Defence applied. New message: " + transformedMessage);
return transformedMessage;
// apply random sequence enclosure defense to input message
function transformRandomSequenceEnclosure(message) {
console.debug("Random Sequence Enclosure defence active.");
const randomString = generate_random_string(process.env.RANDOM_SEQ_ENCLOSURE_LENGTH);
const introText = process.env.RANDOM_SEQ_ENCLOSURE_PRE_PROMPT;
const transformedMessage = introText.concat(randomString, " {{ ", message, " }} ", randomString, ". ");
return transformedMessage;
}

// function to escape XML characters in user input to prevent hacking with XML tagging on
function escapeXml(unsafe) {
return unsafe.replace(/[<>&'"]/g, function (c) {
switch (c) {
case '<': return '&lt;';
case '>': return '&gt;';
case '&': return '&amp;';
case '\'': return '&apos;';
case '"': return '&quot;';
}
});
}

// apply XML tagging defence to input message
function transformXmlTagging(message) {
console.debug("XML Tagging defence active.");
const openTag = "<user_input>";
const closeTag = "</user_input>";
const transformedMessage = openTag.concat(escapeXml(message), closeTag);
return transformedMessage;
}

//apply defence string transformations to original message
function transformMessage(message) {
let transformedMessage = message;
if (isDefenceActive("RANDOM_SEQUENCE_ENCLOSURE")) {
transformedMessage = transformRandomSequenceEnclosure(transformedMessage);
}
if (isDefenceActive("XML_TAGGING")) {
transformedMessage = transformXmlTagging(transformedMessage);
}
if (message == transformedMessage) {
console.debug("No defences applied. Message unchanged.");
} else {
console.debug("No defence prompt transformations applied.")
return message;
console.debug("Defences applied. Transformed message: " + transformedMessage);
}
return transformedMessage;
}

// detects triggered defences in message and blocks the message if necessary
function detectTriggeredDefences(message) {
// keep track of any triggered defences
const defenceInfo = { blocked: false, triggeredDefences: [] };
const maxMessageLength = process.env.MAX_MESSAGE_LENGTH || 280;
// check if the message is too long
if (message.length > maxMessageLength) {
console.debug("CHARACTER_LIMIT defence triggered.");
// add the defence to the list of triggered defences
defenceInfo.triggeredDefences.push("CHARACTER_LIMIT");
// check if the defence is active
if (isDefenceActive("CHARACTER_LIMIT")) {
// block the message
defenceInfo.blocked = true;
// return the defence info
return { reply: "Message is too long", defenceInfo: defenceInfo };
}
}
// check if message contains XML tags
const safeXmlMessage = escapeXml(message);
if (message !== safeXmlMessage) {
console.debug("XML_TAGGING defence triggered.");
// add the defence to the list of triggered defences
defenceInfo.triggeredDefences.push("XML_TAGGING");
}
return { reply: null, defenceInfo: defenceInfo };
}

module.exports = {
activateDefence,
deactivateDefence,
getDefences,
isDefenceActive,
transformMessage
transformMessage,
detectTriggeredDefences
};
23 changes: 3 additions & 20 deletions backend/openai.js
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ async function chatGptCallFunction(functionCall) {
} else {
console.error("Unknown function: " + functionName);
}

return reply;
}

Expand All @@ -100,39 +99,23 @@ async function chatGptChatCompletion() {
}

async function chatGptSendMessage(message) {
// keep track of any triggered defences
const defenceInfo = { blocked: false, triggeredDefences: [] };
const maxMessageLength = process.env.MAX_MESSAGE_LENGTH || 280;
// check if the message is too long
if (message.length > maxMessageLength) {
// add the defence to the list of triggered defences
defenceInfo.triggeredDefences.push("CHARACTER_LIMIT");
// check if the defence is active
if (isDefenceActive("CHARACTER_LIMIT")) {
// block the message
defenceInfo.blocked = true;
// return the defence info
return { reply: "Message is too long", defenceInfo: defenceInfo };
}
}
// add message to chat
chatGptMessages.push({ role: "user", content: message });

let reply = await chatGptChatCompletion();

// check if GPT wanted to call a function
if (reply.function_call) {
// call the function and get a new reply
reply = await chatGptCallFunction(reply.function_call);
}

// return the reply content
return { reply: reply.content, defenceInfo: defenceInfo };
return { reply: reply.content };
}

// clear chat history
function clearMessages() {
chatGptMessages.length = 0;
}

module.exports = { initOpenAi, chatGptSendMessage, clearMessages };
module.exports = { initOpenAi, chatGptSendMessage, clearMessages };
7 changes: 7 additions & 0 deletions backend/router.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ const {
deactivateDefence,
getDefences,
transformMessage,
detectTriggeredDefences,
} = require("./defence");
const { clearEmails, getSentEmails } = require("./email");
const { chatGptSendMessage, clearMessages } = require("./openai");
Expand Down Expand Up @@ -49,6 +50,12 @@ router.post("/defence/transform", (req, res, next) => {
res.send(transformMessage(message));
});

// Get the status of all defences
router.post("/defence/detect", (req, res, next) => {
const message = req.body?.message;
res.send(detectTriggeredDefences(message));
});

// Clear sent emails
router.post("/email/clear", (req, res, next) => {
clearEmails();
Expand Down
22 changes: 17 additions & 5 deletions frontend/src/components/ChatBox/ChatBox.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import {
openAiSendMessage,
} from "../../service/openaiService";
import { getSentEmails } from "../../service/emailService";
import { transformInputPrompt } from "../../service/defenceService";
import { transformInputPrompt, detectTriggeredDefences } from "../../service/defenceService";

function ChatBox(props) {
const [isSendingMessage, setIsSendingMessage] = useState(false);
Expand Down Expand Up @@ -54,18 +54,30 @@ function ChatBox(props) {
},
]);
}

// clear the input
event.target.value = "";

const reply = await openAiSendMessage(transformedMessage);
// check if original input triggers any defence mechanisms
const triggeredDefenceCheck = await detectTriggeredDefences(transformedMessage)
const defenceInfo = triggeredDefenceCheck.defenceInfo;

let reply;
// if the defence info is blocked, set reply to blocked message
if (defenceInfo.blocked){
reply = triggeredDefenceCheck;
} else {
// if not blocked, send the message to chatgpt and get reply
reply = await openAiSendMessage(transformedMessage);
}

// add it to the list of messages
setMessages((messages) => [
...messages,
{ isUser: false, message: reply.reply, defenceInfo: reply.defenceInfo },
{ isUser: false, message: reply.reply, defenceInfo: defenceInfo },
]);
// update triggered defences
props.updateTriggeredDefences(reply.defenceInfo.triggeredDefences);
props.updateTriggeredDefences(defenceInfo.triggeredDefences);

// we have the message reply
setIsSendingMessage(false);

Expand Down
8 changes: 8 additions & 0 deletions frontend/src/components/DefenceBox/DefenceBox.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,14 @@ function DefenceBox(props) {
isActive: false,
isTriggered: false,
},
{
name: "xml tagging",
id: "XML_TAGGING",
info: "enclose the users prompt between <user_input> tags and escapes xml characters in raw input. this is a form of prompt validation.",
isActive: false,
isTriggered: false,
},

]);

// called on mount
Expand Down
13 changes: 12 additions & 1 deletion frontend/src/service/defenceService.js
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,15 @@ async function transformInputPrompt(message) {
});
}

export { getDefenceStatus, activateDefence, deactivateDefence, transformInputPrompt };
async function detectTriggeredDefences(message) {
const response = await fetch(URL + "detect", {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({ message }),
});
return response.json();
}

export { getDefenceStatus, activateDefence, deactivateDefence, transformInputPrompt, detectTriggeredDefences };

0 comments on commit d5ea860

Please sign in to comment.