Skip to content

Commit

Permalink
feat: finished the prompt
Browse files Browse the repository at this point in the history
  • Loading branch information
JimmyLv committed Feb 26, 2023
1 parent 191a9e9 commit cdffcc3
Show file tree
Hide file tree
Showing 8 changed files with 136 additions and 34 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ yarn-error.log*
# local env files
.env*.local
.env
.idea/

# vercel
.vercel
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# [b.jimmylv.cn](https://b.jimmylv.cn/)
# [b.jimmylv.cn](https://b.jimmylv.cn/) 哔哩哔哩视频一键总结

This project summarizes 哔哩哔哩 videos for you using AI.

[![哔哩哔哩视频总结工具](./public/screenshot.png)](https://b.jimmylv.cn)

## How it works

This project uses the [OpenAI GPT-3 API](https://openai.com/api/) (specifically, text-davinci-003) and [Vercel Edge functions](https://vercel.com/features/edge-functions) with streaming. It fetches the content on a Techcrunch article, sends it in a prompt to the GPT-3 API to summarize it via a Vercel Edge function, then streams the response back to the application.
This project uses the [OpenAI GPT-3 API](https://openai.com/api/) (specifically, text-davinci-003) and [Vercel Edge functions](https://vercel.com/features/edge-functions) with streaming. It fetches the content on a Bilibili video, sends it in a prompt to the GPT-3 API to summarize it via a Vercel Edge function, then streams the response back to the application.

Video coming soon on how I built it from scratch!

Expand Down
2 changes: 1 addition & 1 deletion components/Header.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ export default function Header() {
className="flex items-center space-x-3"
href="https://b.jimmylv.cn"
>
<Image src="/edit.png" alt="logo" width={34} height={34} />
<Image src="/video.png" alt="logo" width={50} height={50} />
<h2 className={clsx("text-lg sm:text-3xl", poppins.className)}>
<span className="text-pink-400 ">哔哩哔哩</span> 视频总结器
</h2>
Expand Down
34 changes: 17 additions & 17 deletions pages/[...slug].tsx
Original file line number Diff line number Diff line change
Expand Up @@ -13,35 +13,35 @@ export const Home: NextPage = () => {
const urlState = router.query.slug;
const [summary, setSummary] = useState<string>("");
const [loading, setLoading] = useState<boolean>(false);
const [curArticle, setCurArticle] = useState<string>("");
const [curVideo, setCurVideo] = useState<string>("");

useEffect(() => {
if (
urlState &&
router.isReady &&
!curArticle &&
!curVideo &&
typeof urlState !== "string" &&
urlState.every((subslug: string) => typeof subslug === "string")
) {
generateSummary(
"https://techcrunch.com/" + (urlState as string[]).join("/")
"https://bilibili.com/" + (urlState as string[]).join("/")
);
}
}, [router.isReady, urlState]);

const curUrl = String(curArticle.split(".com")[1]);
const curUrl = String(curVideo.split(".com")[1]);

const generateSummary = async (url?: string) => {
setSummary("");
if (url) {
if (!url.includes("techcrunch.com")) {
toast.error("Please enter a valid 哔哩哔哩 article");
if (!url.includes("bilibili.com")) {
toast.error("Please enter a valid 哔哩哔哩 video");
return;
}
setCurArticle(url);
setCurVideo(url);
} else {
if (!curArticle.includes("techcrunch.com")) {
toast.error("Please enter a valid 哔哩哔哩 article");
if (!curVideo.includes("bilibili.com")) {
toast.error("Please enter a valid 哔哩哔哩 video");
return;
}
router.replace(curUrl);
Expand All @@ -52,7 +52,7 @@ export const Home: NextPage = () => {
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({ url: url ? url : curArticle }),
body: JSON.stringify({ url: url ? url : curVideo }),
});

if (!response.ok) {
Expand Down Expand Up @@ -90,7 +90,7 @@ export const Home: NextPage = () => {
target="_blank"
rel="noreferrer"
className="mx-auto mb-5 hidden max-w-fit rounded-full border border-gray-800 px-4 py-1 text-gray-500 transition duration-300 ease-in-out hover:scale-105 hover:border-gray-700 md:block"
href="https://twitter.com/nutlope/status/1622988173155368960"
href="https://space.bilibili.com/37648256"
>
You can also go to a Bilibili video and change the suffix "
<span className="text-pink-400">.com</span>" into "
Expand All @@ -108,24 +108,24 @@ export const Home: NextPage = () => {
<SquigglyLines />
<span className="relative text-pink-400 ">哔哩哔哩</span>
</span>{" "}
article with AI
video with AI
</h1>
<p className="mt-10 text-center text-lg text-gray-500 sm:text-2xl">
Copy and paste any <span className="text-pink-400 ">哔哩哔哩 </span>
article link below.
video link below. 👇
</p>
<input
type="text"
value={curArticle}
onChange={(e) => setCurArticle(e.target.value)}
value={curVideo}
onChange={(e) => setCurVideo(e.target.value)}
className="mx-auto mt-10 w-full appearance-none rounded-lg rounded-md border bg-transparent py-2 pl-2 text-sm leading-6 text-slate-900 shadow-sm ring-1 ring-slate-200 placeholder:text-slate-400 focus:outline-none focus:ring-2 focus:ring-blue-500 dark:text-slate-100 dark:ring-0 dark:placeholder:text-slate-500 dark:focus:ring-2"
/>
{!loading && (
<button
className="z-10 mx-auto mt-7 w-3/4 rounded-2xl border-gray-500 bg-sky-400 p-3 text-lg font-medium text-white transition hover:bg-sky-500 sm:mt-10 sm:w-1/3"
onClick={() => generateSummary()}
>
Summarize
一键总结(三连)
</button>
)}
{loading && (
Expand Down Expand Up @@ -154,7 +154,7 @@ export const Home: NextPage = () => {
Summary
</h2>
<div className="mx-auto mt-6 max-w-3xl text-lg leading-7">
{summary.split(". ").map((sentence, index) => (
{summary.split("- ").map((sentence, index) => (
<div key={index}>
{sentence.length > 0 && (
<li className="mb-2 list-disc">{sentence}</li>
Expand Down
45 changes: 31 additions & 14 deletions pages/api/summarize.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { parse } from "node-html-parser";
import { OpenAIStream } from "../../utils/OpenAIStream";
import { getChunckedTranscripts, getSummaryPrompt } from "../../utils/prompt";

export const config = {
runtime: "edge",
Expand All @@ -19,19 +19,36 @@ export default async function handler(req: Request) {
}

try {
const response = await fetch(url, {
method: "GET",
});

const data = await response.text();
const matchResult = url.match(/\/video\/(.*)/);
let bvId: string | undefined;
if (matchResult) {
bvId = matchResult[1];
}
const response = await fetch(
`https://api.bilibili.com/x/web-interface/view?bvid=${bvId}`,
{
method: "GET",
}
);
const res = await response.json();
// @ts-ignore
const title = res.data.title;
const subtitleUrl = res.data.subtitle?.list?.[0]?.subtitle_url;
console.log("subtitle_url", subtitleUrl);

const root = parse(data);
const body = root.querySelector(".article-content");
const text = body!.innerText
.replace(/(\r\n|\n|\r)/gm, "")
.replace(/(\r\t|\t|\r)/gm, "");

const prompt = `I want you to act like a news article summarizer. I will input text from a news article and your job is to convert it into a useful summary of a few sentences. Do not repeat sentences and make sure all sentences are clear and complete: "${text}"`;
const subtitleResponse = await fetch(subtitleUrl);
const subtitles = await subtitleResponse.json();
// @ts-ignore
const transcripts = subtitles.body.map((item, index) => {
return {
text: item.content,
index,
timestamp: item.from,
};
});
console.log("========transcripts========", transcripts);
const text = getChunckedTranscripts(transcripts, transcripts);
const prompt = getSummaryPrompt(title, text);

const payload = {
model: "text-davinci-003",
Expand All @@ -40,7 +57,7 @@ export default async function handler(req: Request) {
top_p: 1,
frequency_penalty: 0,
presence_penalty: 0,
max_tokens: 200,
max_tokens: 400,
stream: true,
n: 1,
};
Expand Down
Binary file modified public/screenshot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added public/video.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
84 changes: 84 additions & 0 deletions utils/prompt.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@

export function getSummaryPrompt(title: string,transcript: any) {
return `标题: "${title
.replace(/\n+/g, " ")
.trim()}"\n视频字幕: "${truncateTranscript(transcript)
.replace(/\n+/g, " ")
.trim()}"\n我希望你是一名专业的视频内容编辑,帮我总结视频的内容精华。请你将视频字幕文本进行总结,然后以无序列表的方式返回。不要超过5点,不要重复句子,确保所有的句子都清晰完整,祝你好运!`;
}

// Seems like 15,000 bytes is the limit for the prompt
const limit = 7000; // 1000 is a buffer

export function getChunckedTranscripts(textData: { text: any; index: any; }[], textDataOriginal: any[]) {

// [Thought Process]
// (1) If text is longer than limit, then split it into chunks (even numbered chunks)
// (2) Repeat until it's under limit
// (3) Then, try to fill the remaining space with some text
// (eg. 15,000 => 7,500 is too much chuncked, so fill the rest with some text)

let result = "";
const text = textData.sort((a, b) => a.index - b.index).map(t => t.text).join(" ");
const bytes = textToBinaryString(text).length;

if (bytes > limit) {
// Get only even numbered chunks from textArr
const evenTextData = textData.filter((t, i) => i % 2 === 0);
result = getChunckedTranscripts(evenTextData, textDataOriginal);
} else {
// Check if any array items can be added to result to make it under limit but really close to it
if (textDataOriginal.length !== textData.length) {
textDataOriginal.forEach((obj, i) => {

if (textData.some(t => t.text === obj.text)) { return; }

textData.push(obj);

const newText = textData.sort((a, b) => a.index - b.index).map(t => t.text).join(" ");
const newBytes = textToBinaryString(newText).length;

if (newBytes < limit) {

const nextText = textDataOriginal[i + 1];
const nextTextBytes = textToBinaryString(nextText.text).length;

if (newBytes + nextTextBytes > limit) {
const overRate = ((newBytes + nextTextBytes) - limit) / nextTextBytes;
const chunkedText = nextText.text.substring(0, Math.floor(nextText.text.length * overRate));
textData.push({ text: chunkedText, index: nextText.index });
result = textData.sort((a, b) => a.index - b.index).map(t => t.text).join(" ");
} else {
result = newText;
}
}

})
} else {
result = text;
}
}

const originalText = textDataOriginal.sort((a, b) => a.index - b.index).map(t => t.text).join(" ");
return (result == "") ? originalText : result; // Just in case the result is empty

}

function truncateTranscript(str:string) {
const bytes = textToBinaryString(str).length;
if (bytes > limit) {
const ratio = limit / bytes;
const newStr = str.substring(0, str.length * ratio);
return newStr;
}
return str;
}

function textToBinaryString(str:string) {
let escstr = decodeURIComponent(encodeURIComponent(escape(str)));
let binstr = escstr.replace(/%([0-9A-F]{2})/gi, function (match, hex) {
let i = parseInt(hex, 16);
return String.fromCharCode(i);
});
return binstr;
}

1 comment on commit cdffcc3

@vercel
Copy link

@vercel vercel bot commented on cdffcc3 Feb 26, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.