In [5]:
from langchain import PromptTemplate
from langchain.chat_models import PromptLayerChatOpenAI
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain.chains import LLMChain
import promptlayer
import os
import pandas as pd
import re
promptlayer.api_key = os.environ["PROMPTLAYER_API_KEY"]
openai = promptlayer.openai
gpt_model = "gpt-4"
openai.api_key = os.environ["OPENAI_API_KEY"]
print("OPENAI_API_KEY: "+os.environ["OPENAI_API_KEY"])

OPENAI_API_KEY: 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


# Parameters

In [2]:
filename = "instagram-reel-archive.csv"

# Generate YouTube Short Titles

Given a CSV file containing an Instagram caption (downloaded independently), this script will take the caption and remove any disallowed characters not accepted on YouTube. After that, it will use a LLM to convert the Instagram caption into a snappy title for a YouTube Short Video. 

In [8]:
df = pd.read_csv(filename)

youtube_titles=[None]*len(df)
youtube_descriptions=[None]*len(df)

def clean_youtube_description(description):
     # Allow alphanumeric characters, spaces, and some basic punctuation and special characters
    allowed_characters = r"[^a-zA-Z0-9\s\.,;:!?\-_\n\(\)\[\]\"\'\@\#\$\%\^\&\*\+\=\/]"
    # Replace characters not in the allowed list with an empty string
    cleaned_description = re.sub(allowed_characters, "", description)
    return cleaned_description

#This will use a LLMChain in LangChain to make the conversion, foregoing the use of Chatbot interface
def generate_youtube_title_completion(description):
    template = """You are an assistant to a Reproductive Endocrinologist YouTube creator. You will generate 1 YouTube Short video title from an Instagram caption that is delimeted by ```
    You must generate a title that is funny, colorful and engaging. You like to use puns, palindromes, malapropisms, double entendre, alliteration and GenZ terminology.
    The title must be shorter than 40 characters.
    The content of the videos you are creating titles for relate to fertility and women's reproductive health, and as such, the titles should be targetted towards females in the 24-40 age range.
    ```
    {caption}
    ```
    """
    prompt_template = PromptTemplate(input_variables=["caption"],template=template)
    llm = PromptLayerChatOpenAI(model=gpt_model,pl_tags=["InstagramToYouTube"])
    llm_chain = LLMChain(llm=llm,prompt=prompt_template)
    response = llm_chain.predict(caption=description)
    return response
    




def generate_youtube_title(description):
    #this will take as an input the caption from Instagram 
    system_template = f"""You are an assistant playing the role of a Reproductive Endocrinologist YouTube creator. You will generate 1 YouTube short video title from an Instagram caption passed to it."""
    system_template2=f"""The titles you generate should be funny, colorful and engaging. You like to use puns, palindromes, malapropisms, double entendre, alliteration and GenZ terminology."""
    system_template3=f"""The titles generated must be shorter than 40 characters."""
    system_template4=f"""The content of the videos are fertility and reproductive health and should be targetted towards females in the 24-40 age
    demographic."""
    
    
    system_prompt_message = SystemMessagePromptTemplate.from_template(system_template)
    system_prompt_message2 = SystemMessagePromptTemplate.from_template(system_template2)
    system_prompt_message3 = SystemMessagePromptTemplate.from_template(system_template3)
    system_prompt_message4 = SystemMessagePromptTemplate.from_template(system_template4)
    
    human_template="{description}"
    human_prompt_message = HumanMessagePromptTemplate.from_template(human_template)
    
    chat_prompt = ChatPromptTemplate.from_messages([system_prompt_message,system_prompt_message2,system_prompt_message3,system_prompt_message4,human_prompt_message])
    chat = PromptLayerChatOpenAI(model=gpt_model,pl_tags=["YouTubeConverter"])
    result = chat(chat_prompt.format_prompt(description=description).to_messages())
    return result.content

# Iterate through each row in the DataFrame using a for loop
for index, row in df.tail(1).iterrows():
    id = str(row['ID'])
    caption = str(row['Caption'])  # Get the value from the 'Caption' column
    description = clean_youtube_description(caption)
    title_completion = generate_youtube_title_completion(description)
    title = generate_youtube_title(description)
    print(f"""Original Caption: {caption},\n\n
    YouTube Caption from Chat: {title},\n\n
    YouTube Caption from Completion:{title_completion}\n\n\n""")
    
    youtube_titles[index] = title
    youtube_descriptions[index]=description
    
#append the new columns to the existing data frame    
df["YouTube Titles"] = youtube_titles
df["YouTube Description"]=youtube_descriptions
    
#write back out to csv the new file
df.to_csv(filename,index=False)

Original Caption: Quintuplet headlines are not success stories. Yes it seems miraculous to go from having no kids to 5 kids overnight - but twin and especially higher order multiple pregnancies are never the goal of fertility treatment- they are actually viewed as a risk/complication. 

Nowadays, the risk of having twins, and especially triplets or even higher order pregnancies, is significantly lower from fertility treatment thanks to many clinics being able to adapt a Single Embryo Transfer (SET) approach (due to improved IVF success rates). Embryos can split 2-3% of the time (a not so common occurrence). Another mechanism for twins is using medication to induce ovulation (with timed intercourse or IUI)- if using oral meds like clomid or letrozole the risk is lower (3-8% depending on age, # follicles that become mature) … using injectable medications (FSH, menopur etc) can be associated with a much higher risk.

Why is this important?
Multifetal pregnancy has a much higher risk of pr