Skip to content

Commit

Permalink
feat: updated the timestamp prompt
Browse files Browse the repository at this point in the history
  • Loading branch information
JimmyLv committed Mar 12, 2023
1 parent 6947c33 commit a6b7fcc
Show file tree
Hide file tree
Showing 9 changed files with 85 additions and 24 deletions.
4 changes: 2 additions & 2 deletions components/Sentence.tsx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { extractSentence } from "~/utils/extractSentence";
import { extractSentenceWithTimestamp } from "~/utils/extractSentenceWithTimestamp";
import { extractTimestamp, trimSeconds } from "~/utils/extractTimestamp";

export default function videoIdSentence({
Expand All @@ -20,7 +20,7 @@ export default function videoIdSentence({
? `https://www.bilibili.com/video/${videoId}/?t=`
: `https://youtube.com/watch?v=${videoId}&t=`;

const matchResult = extractSentence(sentence);
const matchResult = extractSentenceWithTimestamp(sentence);
if (matchResult) {
// simplify the seconds with number: 1:11 or 1.11 -> 7, todo: 0.003 is not able
const secondsStr = matchResult[1].split(":")[0];
Expand Down
9 changes: 6 additions & 3 deletions components/SummaryResult.tsx
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import Markdown from "marked-react";
import React from "react";
import { ActionsAfterResult } from "~/components/ActionsAfterResult";
import Sentence from "~/components/Sentence";
import { useToast } from "~/hooks/use-toast";
import { formatSummary } from "~/utils/formatSummary";
import Markdown from "marked-react";

export let isSecureContext = false;

Expand All @@ -23,7 +23,10 @@ export function SummaryResult({
shouldShowTimestamp?: boolean;
}) {
const { toast } = useToast();
const { summaryArray, formattedSummary } = formatSummary(summary);
const { summaryArray, formattedSummary } = formatSummary(
summary,
shouldShowTimestamp
);
const summaryNote =
formattedSummary +
"\n\n #BibiGPT自动总结 b.jimmylv.cn @吕立青_JimmyLv \nBV1fX4y1Q7Ux";
Expand Down Expand Up @@ -54,7 +57,7 @@ export function SummaryResult({
onClick={handleCopy}
>
{shouldShowTimestamp ? (
summaryArray.map((sentence, index) => (
summaryArray.map((sentence: string, index: number) => (
<div key={index}>
{sentence.length > 0 && (
<Sentence
Expand Down
24 changes: 22 additions & 2 deletions lib/openai/prompt.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { limitTranscriptByteLength } from "~/lib/openai/getSmallSizeTranscripts";
import { CommonSubtitleItem } from "~/lib/types";

interface PromptConfig {
language?: string;
Expand Down Expand Up @@ -55,6 +56,25 @@ export function getUserSubtitlePrompt(title: string, transcript: any) {
.replace(/\n+/g, " ")
.trim();
const language = `zh-CN`;
const instruction = `\n\nInstructions: Your output should use the following template:\n### Summary\n### Highlights\n- [Emoji] Bulletpoint\n\nYour task is to summarise the text I have given you in up to seven concise bullet points, starting with a short highlight. Choose an appropriate emoji for each bullet point. Use the text above: {{Title}} {{Transcript}}.\n\n\nReply in ${language} Language.`;
return `Title: "${videoTitle}"\nTranscript: "${videoTranscript}"${instruction}`;
const prompt = `Your output should use the following template:\n### Summary\n### Highlights\n- [Emoji] Bulletpoint\n\nYour task is to summarise the text I have given you in up to seven concise bullet points, starting with a short highlight. Choose an appropriate emoji for each bullet point. Use the text above: {{Title}} {{Transcript}}.\n\n\nReply in ${language} Language.`;

return `Title: "${videoTitle}"\nTranscript: "${videoTranscript}"\n\nInstructions: ${prompt}`;
}

export function getUserSubtitleWithTimestampPrompt(
title: string,
transcript: any
) {
const videoTitle = title?.replace(/\n+/g, " ").trim();
console.log("========transcript========", transcript);
const videoTranscript = transcript.map((i: CommonSubtitleItem) => ({
start_time: i.index,
text: i.text,
}));
const language = "Chinese";
const promptWithTimestamp = `Act as the author and provide exactly 5 bullet points all in ${language} language for the text transcript given in the format [{\"start_time\": <start_time>, \"text\": <text>}] \n and make the output only in the format of a json array [{\"start_time\": <start_time> , \"bullet_point\": <bullet_point>} ]\n Make sure that:\n - The output is not more than 5 bullet points\n - each bullet_point is at least 15 words and all bullet points are sorted by \"start_time\"\n - each bullet_point doesn't start with \"- \" or a number or a bullet point symbol\n - Wrap json keys with double quotes and don't put single quotes or double quotes inside the values. \n - The output json is not wrapped in any other json structure like { \"data\": <output json >}.`;
const videoTranscripts = limitTranscriptByteLength(
JSON.stringify(videoTranscript)
);
return `Title: ${videoTitle}\nTranscript: ${videoTranscripts}\n\nInstructions: ${promptWithTimestamp}`;
}
6 changes: 5 additions & 1 deletion lib/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,8 @@ export enum VideoService {
LocalAudio = "local-audio",
}

export type CommonSubtitleItem = { text: string; index: number };
export type CommonSubtitleItem = {
text: string;
index: number;
start_time?: number | string;
};
5 changes: 4 additions & 1 deletion lib/youtube/fetchYoutubeSubtitle.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
import { fetchYoutubeSubtitleUrls, SUBTITLE_DOWNLOADER_URL } from "~/lib/youtube/fetchYoutubeSubtitleUrls";
import {
fetchYoutubeSubtitleUrls,
SUBTITLE_DOWNLOADER_URL,
} from "~/lib/youtube/fetchYoutubeSubtitleUrls";
import { find } from "~/utils/fp";
import { reduceYoutubeSubtitleTimestamp } from "~/utils/reduceSubtitleTimestamp";

Expand Down
19 changes: 12 additions & 7 deletions pages/api/sumup.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import {
getExamplePrompt,
getSystemPrompt,
getUserSubtitlePrompt,
getUserSubtitleWithTimestampPrompt,
} from "~/lib/openai/prompt";
import { selectApiKeyAndActivatedLicenseKey } from "~/lib/openai/selectApiKeyAndActivatedLicenseKey";
import { SummarizeParams } from "~/lib/types";
Expand Down Expand Up @@ -47,14 +48,18 @@ export default async function handler(
? getSmallSizeTranscripts(subtitlesArray, subtitlesArray)
: descriptionText; // subtitlesArray.map((i) => i.text).join("\n")

const systemPrompt = getSystemPrompt({
shouldShowTimestamp: subtitlesArray ? shouldShowTimestamp : false,
});
const examplePrompt = getExamplePrompt();
const userPrompt = getUserSubtitlePrompt(title, inputText);
// TODO: try the apiKey way for chrome extensions
// const systemPrompt = getSystemPrompt({
// shouldShowTimestamp: subtitlesArray ? shouldShowTimestamp : false,
// });
// const examplePrompt = getExamplePrompt();
const userPrompt =
subtitlesArray && shouldShowTimestamp
? getUserSubtitleWithTimestampPrompt(title, subtitlesArray)
: getUserSubtitlePrompt(title, inputText);
if (isDev) {
console.log("final system prompt: ", systemPrompt);
console.log("final example prompt: ", examplePrompt);
// console.log("final system prompt: ", systemPrompt);
// console.log("final example prompt: ", examplePrompt);
console.log("final user prompt: ", userPrompt);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
export function extractSentence(sentence: string) {
export function extractSentenceWithTimestamp(sentence: string) {
return sentence
.replace("0:", "0.0") // 修复第0秒
.match(/^\s*(\d+[\.:]?\d+?)([::秒 ].*)/);
Expand Down
36 changes: 31 additions & 5 deletions utils/formatSummary.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,39 @@
import { extractSentence } from "~/utils/extractSentence";
import { extractSentenceWithTimestamp } from "~/utils/extractSentenceWithTimestamp";
import { extractTimestamp } from "~/utils/extractTimestamp";

export function formatSummary(summary: string) {
export function formatSummary(summary: string, shouldShowTimestamp?: boolean) {
if (shouldShowTimestamp) {
try {
const parsedBulletPoints = JSON.parse(summary);
const summaryArray = parsedBulletPoints.map(
({
start_time,
bullet_point,
}: {
start_time: number;
bullet_point: string;
}) => {
const startTime = start_time === 0 ? "0.0" : start_time;
return startTime + " " + bullet_point;
}
);
return {
summaryArray,
formattedSummary: summaryArray.join("\n"),
};
} catch (e) {
console.error(e);
return {};
}
}

const summaryArray = ("\n" + summary).split("\n- ");
const formattedSummary = summaryArray
.map((s) => {
const matchResult = extractSentence(s);
if (matchResult) {
const { formattedContent, timestamp } = extractTimestamp(matchResult);
const matchTimestampResult = extractSentenceWithTimestamp(s);
if (matchTimestampResult) {
const { formattedContent, timestamp } =
extractTimestamp(matchTimestampResult);
return timestamp + formattedContent;
} else {
return s.replace(/\n\n/g, "\n");
Expand Down
4 changes: 2 additions & 2 deletions utils/reduceSubtitleTimestamp.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import { CommonSubtitleItem } from "~/lib/types";
import { trimSeconds } from "~/utils/extractTimestamp";

export type YoutubeSubtitleItem = { start: number; lines: string[] };
/*{ "from": 16.669, "content": "让ppt变得更加精彩" },*/
Expand Down Expand Up @@ -52,8 +51,9 @@ export function reduceSubtitleTimestamp<T>(
accumulator[groupIndex] = {
// 5.88 -> 5.9
// text: current.start.toFixed() + ": ",
text: shouldShowTimestamp ? getStart(current) + " - " : "",
index: groupIndex,
start_time: getStart(current),
text: shouldShowTimestamp ? getStart(current) + " - " : ""
};
}

Expand Down

1 comment on commit a6b7fcc

@vercel
Copy link

@vercel vercel bot commented on a6b7fcc Mar 12, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.