Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

17 defence xml tagging #34

Merged
merged 25 commits into from
Jul 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
3badc9c
WIP: Defence mechanism info box
gsproston-scottlogic Jul 14, 2023
46c2ae2
WIP: visual change when a defence is clicked on
gsproston-scottlogic Jul 14, 2023
44178bd
WIP: Backend support to get and set defences
gsproston-scottlogic Jul 14, 2023
5400425
WIP: Frontend can now (de)activate defence
gsproston-scottlogic Jul 14, 2023
67e13d0
Fix comparison bug
gsproston-scottlogic Jul 14, 2023
f330bb3
Fix calling bug
gsproston-scottlogic Jul 14, 2023
c4d120a
WIP: Character limit backend detection
gsproston-scottlogic Jul 14, 2023
b30ec19
WIP: Defence flashes red when triggered
gsproston-scottlogic Jul 14, 2023
5139de8
Working defence mechanism
gsproston-scottlogic Jul 14, 2023
ee8294f
Add random sequence enclosure frontend selection
heatherlogan-scottlogic Jul 17, 2023
be071d7
Transform prompt with random sequence enclosure
heatherlogan-scottlogic Jul 17, 2023
4023f36
Move transform func to defence. Configurations as env variables
heatherlogan-scottlogic Jul 17, 2023
378751c
Merge dev
heatherlogan-scottlogic Jul 17, 2023
4f01c07
Display original and transformed prompt in chatbox
heatherlogan-scottlogic Jul 17, 2023
a6f1097
Merge branch 'dev' of https://github.com/gsproston-scottlogic/prompt-…
heatherlogan-scottlogic Jul 17, 2023
cb877d5
Change colour of edited chatbot message
heatherlogan-scottlogic Jul 18, 2023
4b68583
Add XML tagging defence
heatherlogan-scottlogic Jul 18, 2023
c4ef74d
Refactor message transformation
heatherlogan-scottlogic Jul 18, 2023
42ac75b
Merge with dev
heatherlogan-scottlogic Jul 18, 2023
57627ba
Detect triggered defences function. detect XML tagging
heatherlogan-scottlogic Jul 18, 2023
3f59523
Move defence detection to service so we can apply to original message
heatherlogan-scottlogic Jul 18, 2023
c4244bb
clean up
heatherlogan-scottlogic Jul 18, 2023
2c64a80
pass in original message to detect function
heatherlogan-scottlogic Jul 18, 2023
213fbc0
Merge with dev
heatherlogan-scottlogic Jul 18, 2023
6b3cb1b
update xml tagging description
heatherlogan-scottlogic Jul 18, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 83 additions & 17 deletions backend/defence.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,12 @@ const defences = [
},
{
id: "RANDOM_SEQUENCE_ENCLOSURE",
isActive: false,
isActive: false,
},
{
id: "XML_TAGGING",
isActive: false,
}
];

// activate a defence
Expand Down Expand Up @@ -42,34 +46,96 @@ function isDefenceActive(id) {
return false;
}

function generate_random_string(string_length){
function generate_random_string(string_length) {
let random_string = '';
for(let i = 0; i < string_length; i++) {
const random_ascii = Math.floor((Math.random() * 25) + 97);
random_string += String.fromCharCode(random_ascii)
for (let i = 0; i < string_length; i++) {
const random_ascii = Math.floor((Math.random() * 25) + 97);
random_string += String.fromCharCode(random_ascii)
}
return random_string
}

// apply defence string transformations to original message
function transformMessage(message){
if (isDefenceActive("RANDOM_SEQUENCE_ENCLOSURE")){
console.debug("Random Sequence Enclosure defence active.");
const randomString = generate_random_string(process.env.RANDOM_SEQ_ENCLOSURE_LENGTH);
const introText = process.env.RANDOM_SEQ_ENCLOSURE_PRE_PROMPT;
let transformedMessage = introText.concat(randomString, " {{ ", message, " }} ", randomString, ". ");
console.debug("Defence applied. New message: " + transformedMessage);
return transformedMessage;
// apply random sequence enclosure defense to input message
function transformRandomSequenceEnclosure(message) {
console.debug("Random Sequence Enclosure defence active.");
const randomString = generate_random_string(process.env.RANDOM_SEQ_ENCLOSURE_LENGTH);
const introText = process.env.RANDOM_SEQ_ENCLOSURE_PRE_PROMPT;
const transformedMessage = introText.concat(randomString, " {{ ", message, " }} ", randomString, ". ");
return transformedMessage;
}

// function to escape XML characters in user input to prevent hacking with XML tagging on
function escapeXml(unsafe) {
return unsafe.replace(/[<>&'"]/g, function (c) {
switch (c) {
case '<': return '&lt;';
case '>': return '&gt;';
case '&': return '&amp;';
case '\'': return '&apos;';
case '"': return '&quot;';
}
});
}

// apply XML tagging defence to input message
function transformXmlTagging(message) {
console.debug("XML Tagging defence active.");
const openTag = "<user_input>";
const closeTag = "</user_input>";
const transformedMessage = openTag.concat(escapeXml(message), closeTag);
return transformedMessage;
}

//apply defence string transformations to original message
function transformMessage(message) {
let transformedMessage = message;
if (isDefenceActive("RANDOM_SEQUENCE_ENCLOSURE")) {
transformedMessage = transformRandomSequenceEnclosure(transformedMessage);
}
if (isDefenceActive("XML_TAGGING")) {
transformedMessage = transformXmlTagging(transformedMessage);
}
if (message == transformedMessage) {
console.debug("No defences applied. Message unchanged.");
} else {
console.debug("No defence prompt transformations applied.")
return message;
console.debug("Defences applied. Transformed message: " + transformedMessage);
}
return transformedMessage;
}

// detects triggered defences in message and blocks the message if necessary
function detectTriggeredDefences(message) {
// keep track of any triggered defences
const defenceInfo = { blocked: false, triggeredDefences: [] };
const maxMessageLength = process.env.MAX_MESSAGE_LENGTH || 280;
// check if the message is too long
if (message.length > maxMessageLength) {
console.debug("CHARACTER_LIMIT defence triggered.");
// add the defence to the list of triggered defences
defenceInfo.triggeredDefences.push("CHARACTER_LIMIT");
// check if the defence is active
if (isDefenceActive("CHARACTER_LIMIT")) {
// block the message
defenceInfo.blocked = true;
// return the defence info
return { reply: "Message is too long", defenceInfo: defenceInfo };
}
}
// check if message contains XML tags
const safeXmlMessage = escapeXml(message);
if (message !== safeXmlMessage) {
console.debug("XML_TAGGING defence triggered.");
// add the defence to the list of triggered defences
defenceInfo.triggeredDefences.push("XML_TAGGING");
}
return { reply: null, defenceInfo: defenceInfo };
}

module.exports = {
activateDefence,
deactivateDefence,
getDefences,
isDefenceActive,
transformMessage
transformMessage,
detectTriggeredDefences
};
23 changes: 3 additions & 20 deletions backend/openai.js
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ async function chatGptCallFunction(functionCall) {
} else {
console.error("Unknown function: " + functionName);
}

return reply;
}

Expand All @@ -100,39 +99,23 @@ async function chatGptChatCompletion() {
}

async function chatGptSendMessage(message) {
// keep track of any triggered defences
const defenceInfo = { blocked: false, triggeredDefences: [] };
const maxMessageLength = process.env.MAX_MESSAGE_LENGTH || 280;
// check if the message is too long
if (message.length > maxMessageLength) {
// add the defence to the list of triggered defences
defenceInfo.triggeredDefences.push("CHARACTER_LIMIT");
// check if the defence is active
if (isDefenceActive("CHARACTER_LIMIT")) {
// block the message
defenceInfo.blocked = true;
// return the defence info
return { reply: "Message is too long", defenceInfo: defenceInfo };
}
}
// add message to chat
chatGptMessages.push({ role: "user", content: message });

let reply = await chatGptChatCompletion();

// check if GPT wanted to call a function
if (reply.function_call) {
// call the function and get a new reply
reply = await chatGptCallFunction(reply.function_call);
}

// return the reply content
return { reply: reply.content, defenceInfo: defenceInfo };
return { reply: reply.content };
}

// clear chat history
function clearMessages() {
chatGptMessages.length = 0;
}

module.exports = { initOpenAi, chatGptSendMessage, clearMessages };
module.exports = { initOpenAi, chatGptSendMessage, clearMessages };
7 changes: 7 additions & 0 deletions backend/router.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ const {
deactivateDefence,
getDefences,
transformMessage,
detectTriggeredDefences,
} = require("./defence");
const { clearEmails, getSentEmails } = require("./email");
const { chatGptSendMessage, clearMessages } = require("./openai");
Expand Down Expand Up @@ -49,6 +50,12 @@ router.post("/defence/transform", (req, res, next) => {
res.send(transformMessage(message));
});

// Get the status of all defences
router.post("/defence/detect", (req, res, next) => {
const message = req.body?.message;
res.send(detectTriggeredDefences(message));
});

// Clear sent emails
router.post("/email/clear", (req, res, next) => {
clearEmails();
Expand Down
22 changes: 17 additions & 5 deletions frontend/src/components/ChatBox/ChatBox.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import {
openAiSendMessage,
} from "../../service/openaiService";
import { getSentEmails } from "../../service/emailService";
import { transformInputPrompt } from "../../service/defenceService";
import { transformInputPrompt, detectTriggeredDefences } from "../../service/defenceService";

function ChatBox(props) {
const [isSendingMessage, setIsSendingMessage] = useState(false);
Expand Down Expand Up @@ -54,18 +54,30 @@ function ChatBox(props) {
},
]);
}

// clear the input
event.target.value = "";

const reply = await openAiSendMessage(transformedMessage);
// check if original input triggers any defence mechanisms
const triggeredDefenceCheck = await detectTriggeredDefences(transformedMessage)
const defenceInfo = triggeredDefenceCheck.defenceInfo;

let reply;
// if the defence info is blocked, set reply to blocked message
if (defenceInfo.blocked){
reply = triggeredDefenceCheck;
} else {
// if not blocked, send the message to chatgpt and get reply
reply = await openAiSendMessage(transformedMessage);
}

// add it to the list of messages
setMessages((messages) => [
...messages,
{ isUser: false, message: reply.reply, defenceInfo: reply.defenceInfo },
{ isUser: false, message: reply.reply, defenceInfo: defenceInfo },
]);
// update triggered defences
props.updateTriggeredDefences(reply.defenceInfo.triggeredDefences);
props.updateTriggeredDefences(defenceInfo.triggeredDefences);

// we have the message reply
setIsSendingMessage(false);

Expand Down
8 changes: 8 additions & 0 deletions frontend/src/components/DefenceBox/DefenceBox.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,14 @@ function DefenceBox(props) {
isActive: false,
isTriggered: false,
},
{
name: "xml tagging",
id: "XML_TAGGING",
info: "enclose the users prompt between <user_input> tags and escapes xml characters in raw input. this is a form of prompt validation.",
isActive: false,
isTriggered: false,
},

]);

// called on mount
Expand Down
13 changes: 12 additions & 1 deletion frontend/src/service/defenceService.js
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,15 @@ async function transformInputPrompt(message) {
});
}

export { getDefenceStatus, activateDefence, deactivateDefence, transformInputPrompt };
async function detectTriggeredDefences(message) {
const response = await fetch(URL + "detect", {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({ message }),
});
return response.json();
}

export { getDefenceStatus, activateDefence, deactivateDefence, transformInputPrompt, detectTriggeredDefences };