In [2]:
import aisuite as ai
import os
import requests
import pandas as pd
from dotenv import load_dotenv
from collections import Counter

In [3]:
client = ai.Client()
model = ["openai:gpt-4o", "openai:gpt-4o-mini"]
load_dotenv() 

True

## Step 1: Analyze Prompt & Topic Recommendations

In [None]:
user_input = input("What do you want to write about? ")

messages = [
    {"role": "system", "content": "You are a senior content planner. You provide diverse blog topics based on users need and topics."},
    {"role": "user", "content": f"Based on the user's query: {user_input}, provide 5 compelling blog topics."},
]

response = client.chat.completions.create(
    model=model[1],
    messages=messages,
    temperature=0.75
)
print(response.choices[0].message.content)

## Step 2: Confirm and Research Topic
https://serpapi.com/organic-results

In [4]:
# topic_query = input("What is the final topic you want to write about? ")
topic_query = "Top Digital Nomads Destinations in Asia"
topic_query = "How to Buy a House in Singapore as a Foreigner?"

In [5]:
base_url = "https://serpapi.com/search.json"
params = {
    "q": topic_query,
    "hl": "en",
    "gl": "us",
    "google_domain": "google.com",
    "api_key": os.getenv("SERPAPI_KEY")
}

# Make the API request
response = requests.get(base_url, params=params)

# Check if the request was successful
if response.status_code == 200:
    results = response.json()
    # Process the results as needed
    print("Search results retrieved successfully.")
else:
    print(f"Failed to retrieve search results. Status code: {response.status_code}")

Search results retrieved successfully.


In [6]:
# Create a list to store the results
data = []

# Iterate over the results and append to the list
for result in results.get('organic_results', []):
    position = result.get('position')
    link = result.get('link')
    title = result.get('title')
    data.append({'Position': position, 'Link': link, 'Title': title})

# Convert the list to a DataFrame
df_results = pd.DataFrame(data)

df_serp = df_results.copy()

## Step 3: Analyze SEMRush Data

In [7]:
from urllib.parse import quote

def get_semrush_data(url, api_key=os.getenv("SEMRUSH_API_KEY")):
    base_url = "https://api.semrush.com/"
    type_param = "url_organic"
    export_columns = "Ph,Po,Nq,Cp,Co"
    database = "us"
    display_limit = 50
    display_filter = "%2B%7CPo%7CLt%7C50" #Position lower than 50
    display_sort = "po_asc" #Asc sort by position 
       
    full_url = (
        f"{base_url}?type={type_param}&key={api_key}"
        f"&display_limit={display_limit}&export_columns={export_columns}"
        f"&url={url}&database={database}"
        f"&display_filter={display_filter}&display_sort={display_sort}"
    )

    print(full_url)
    
    response = requests.get(full_url)
    if response.status_code == 200:
        api_output = response.content
        print(api_output)
        decoded_output = api_output.decode('utf-8')
        lines = decoded_output.split('\r\n')
        headers = lines[0].split(';')
        json_data = []
        for line in lines[1:]:
            if line:  # Ensure the line is not empty
                values = line.split(';')
                record = {header: value for header, value in zip(headers, values)}
                json_data.append(record)       
        return json_data
    else:
        return []

In [8]:
df_results['SEMRush_Data'] = df_results['Link'].apply(get_semrush_data)

https://api.semrush.com/?type=url_organic&key=cde4f7ee33018efbf83128a179c13a83&display_limit=50&export_columns=Ph,Po,Nq,Cp,Co&url=https://www.reddit.com/r/digitalnomad/comments/vl38d1/which_country_place_in_asia_is_the_best_for/&database=us&display_filter=%2B%7CPo%7CLt%7C50&display_sort=po_asc
b'Keyword;Position;Search Volume;CPC;Competition\r\nbest places to work remotely in asia;1;40;0.00;0.00\r\n'
https://api.semrush.com/?type=url_organic&key=cde4f7ee33018efbf83128a179c13a83&display_limit=50&export_columns=Ph,Po,Nq,Cp,Co&url=https://www.lostcoconuts.com/best-destinations-southeast-asia-digital-nomads/&database=us&display_filter=%2B%7CPo%7CLt%7C50&display_sort=po_asc
b'Keyword;Position;Search Volume;CPC;Competition\r\nbest places to work remotely in asia;6;40;0.00;0.00\r\n'
https://api.semrush.com/?type=url_organic&key=cde4f7ee33018efbf83128a179c13a83&display_limit=50&export_columns=Ph,Po,Nq,Cp,Co&url=https://www.goatsontheroad.com/digital-nomad-cities-asia/&database=us&display_filte

In [9]:
test_url = df_results.iloc[0]['Link']
print(test_url)
get_semrush_data(test_url)

https://www.reddit.com/r/digitalnomad/comments/vl38d1/which_country_place_in_asia_is_the_best_for/
https://api.semrush.com/?type=url_organic&key=cde4f7ee33018efbf83128a179c13a83&display_limit=50&export_columns=Ph,Po,Nq,Cp,Co&url=https://www.reddit.com/r/digitalnomad/comments/vl38d1/which_country_place_in_asia_is_the_best_for/&database=us&display_filter=%2B%7CPo%7CLt%7C50&display_sort=po_asc
b'Keyword;Position;Search Volume;CPC;Competition\r\nbest places to work remotely in asia;1;40;0.00;0.00\r\n'


[{'Keyword': 'best places to work remotely in asia',
  'Position': '1',
  'Search Volume': '40',
  'CPC': '0.00',
  'Competition': '0.00'}]

In [10]:
df_results

Unnamed: 0,Position,Link,Title,SEMRush_Data
0,1,https://www.reddit.com/r/digitalnomad/comments...,Which country / place in Asia is the best for ...,[{'Keyword': 'best places to work remotely in ...
1,2,https://www.lostcoconuts.com/best-destinations...,12 best destinations in Southeast Asia for dig...,[{'Keyword': 'best places to work remotely in ...
2,3,https://www.goatsontheroad.com/digital-nomad-c...,7 Best Digital Nomad Cities in Asia,[{'Keyword': 'best places to work remotely in ...
3,4,https://www.thedigitalnomad.asia/,Digital Nomad Asia,[]
4,5,https://nomadsembassy.com/best-digital-nomad-c...,12 Best Digital Nomad Cities in Asia [2024 Edi...,[{'Keyword': 'best places to work remotely in ...
5,6,https://www.creimermanlaw.com/post/top-southea...,Top Southeast Asian Destinations for Expats an...,[]
6,7,https://guide.genki.world/easy-travel-planning...,Asia made easy,[]


In [11]:
# Extract all SEMRush_Data from df_results
all_keywords = []
for data in df_results['SEMRush_Data']:
    if data:
        all_keywords.extend([item['Keyword'] for item in data])

In [13]:
# Count the occurrence of each keyword
keyword_counts = Counter(all_keywords)

# Determine the highest and second highest ranking_sites number
highest_count = max(keyword_counts.values())
second_highest_count = sorted(set(keyword_counts.values()), reverse=True)[1] if len(set(keyword_counts.values())) > 1 else 0

# Get top keywords based on the highest and second highest ranking_sites number
top_keywords = [keyword for keyword, count in keyword_counts.items() if count == highest_count or count == second_highest_count]

# If the highest ranking_sites number is just 2, select keywords with ranking_sites==2 and ranking_sites==1
if highest_count == 2:
    top_keywords = [keyword for keyword, count in keyword_counts.items() if count in [1, 2]]

# Get the top 10 search volume keywords
search_volume_keywords = sorted(
    [(item['Keyword'], int(item['Search Volume'])) for data in df_results['SEMRush_Data'] if data for item in data],
    key=lambda x: x[1],
    reverse=True
)[:10]

# Combine the top keywords and top search volume keywords
final_keywords = set(top_keywords + [keyword for keyword, _ in search_volume_keywords])

# Create a DataFrame with the final keywords, their search volumes, and frequencies
final_keywords_df = pd.DataFrame(
    [(keyword, 
      next((item['Search Volume'] for data in df_results['SEMRush_Data'] if data for item in data if item['Keyword'] == keyword), 0),
      keyword_counts[keyword])
     for keyword in final_keywords],
    columns=['Keyword', 'Search Volume', 'Frequency']
)

# Sort the DataFrame by Frequency first, then by Search Volume
final_keywords_df = final_keywords_df.sort_values(by=['Frequency', 'Search Volume'], ascending=[False, False])

# Return the DataFrame
final_keywords_df


Unnamed: 0,Keyword,Search Volume,Frequency
0,best places to work remotely in asia,40,4


## Step 4: Fetch Content
https://jina.ai/reader/

In [34]:
def fetch_content(url):
    print(f'working on {url}')
    headers = {
        'Authorization': f'Bearer {os.getenv("JINA_API_KEY")}',
        'X-Retain-Images': 'none',
        "Accept": "application/json",
        'X-Timeout': '15'
    }

    response = requests.get(f'https://r.jina.ai/{url}', headers=headers).json()
    if response['code'] == 200:
        return response['data']['content']
    elif response['code'] != 200:
        return f"ERROR: {url} blocks Jina API"
    else:
        print(f"ERROR: Failed to use Jina API")


In [None]:
df_results['Content'] = df_results['Link'].apply(fetch_content)

In [None]:
df_results

## Step 5: Analyzing Content

In [None]:
df_results.iloc[1]['SEMRush_Data']

In [41]:
messages = [
    {"role": "system", "content": "You are a content researcher who is expertised in analyzing webpages."},
    {"role": "user", "content": f"review the provided content below and do the following, "
                                f"First, analyze whether it's a blog or an article. "
                                f"If not, give it a miss and move on; if yes, add it to your review list. "
                                f"For all articles or blog articles in the review list. Review them carefully and provide your analysis which consists of the 3 parts: "
                                f"(1.) common topics and subtopics "
                                f"(2.) contradicting points of view among the top 10 results "
                                f"(3.) for users searching for '{topic_query}', what could be information that they want to know while not covered or questions that aren't answered?\n\n"
                                + "\n".join([f"WEB_CONTENT {i+1}\n{content}" for i, content in enumerate(df_results['Content'])])
     }
]


In [None]:
messages

In [55]:

response = client.chat.completions.create(
    model=model[1],
    messages=messages,
    temperature=0.75
)

content_analysis = response.choices[0].message.content

In [None]:
print(content_analysis)

## Step 6: Generate Content Plan

In [57]:
messages = [
    {"role": "system", "content": "You are an expert content planner, who is an expert at creating a content plan is clear and easy to followed for content writer."},
    {"role": "user", "content": f"review the content analysis below and put together a content plan that contains the following:\n\n"
                                f"Topic: {topic_query}\n"
                                f"A content outline with hierarchical structure of headings and subheadings\n\n"
                                f"SEO Keywords: incorporate top keywords: {final_keywords}\n\n"
                                f"while putting together a content plan, ensure that:\n"
                                f"the common topics and subtopics are covered\n"
                                f"make a note for contradicting points\n\n"
                                f"CONTENT ANALYSIS\n {content_analysis}"
     }
]

In [None]:
messages

In [59]:
response = client.chat.completions.create(
    model=model[1],
    messages=messages,
    temperature=0.75
)

content_plan = response.choices[0].message.content

In [None]:
content_plan

## Step 7: Generate Content Draft

In [61]:
messages = [
    {"role": "system", "content": "You are an expert content writer. You are excellent at following the content plan provided and writing blog posts that are engaging and inspiring while also follow SEO best practices."},
    {"role": "user", "content": f"Review the Content Plan and Competitor Content Analysis below and write an article. Provide just the article and nothing else:\n\n"
                                f"Content Plan:\n {content_plan}\n\n"
                                f"Competitor Content Analysis:\n {content_analysis}"

     }
]

In [62]:
response = client.chat.completions.create(
    model=model[1],
    messages=messages,
    temperature=0.75
)

In [None]:
content_draft = response.choices[0].message.content


In [None]:
print(content_draft)

## Step 8: Proofread the Draft Post

In [66]:
messages = [
    {"role": "system", "content": "You are an expert content editor. You are excellent at reviewing and editing content to make it engaging while following the SEO best practices."},
    {"role": "user", "content": f"Review the Content Draft below and make sure it follows the content plan, and is better than competitor's content as shown in Competitor Content Analysis. Provide just the article and nothing else:\n\n"
                                f"Content Draft:\n {content_draft}\n\n"
                                f"Content Plan:\n {content_plan}\n\n"
                                f"Competitor Content Analysis:\n {content_analysis}"
     }
]

In [67]:
response = client.chat.completions.create(
    model=model[1],
    messages=messages,
    temperature=0.75
)

proofread_draft = response.choices[0].message.content

In [None]:
print(proofread_draft)

## Step 9: SEO expert

In [69]:
messages = [
    {"role": "system", "content": "You are an SEO expert. You are excellent at providing recommendations for Title tag and Meta Description for a given blog article."},
    {"role": "user", "content": f"Review the Content below as well as Targeting Keywords (optional). Provide an URL slug, 3 variants of Title tag, 3 variants of Meta Description. Provide Just the Title and Meta Description and nothing else:\n\n"
                                f"Content:\n {proofread_draft}\n\n"
                                f"Targeting Keywords:\n {final_keywords}\n"
     }
]

response = client.chat.completions.create(
    model=model[1],
    messages=messages,
    temperature=0.75
)

seo_recommendation = response.choices[0].message.content

In [None]:
print(seo_recommendation)

## Step 10: Summarization

In [73]:
messages = [
    {"role": "system", "content": "You are a Senior Project Manager. You are excellent at delivering content to the client in a clear and easy-to-follow format."},
    {"role": "user", "content": f"Review the following information - and generate a document that clearly explain the entire content generation, which should include Title & Meta Description (as well as other options), URL, Targeting Keywords, (and searvh volume), Competitors (only use Position, Link, and Title in the dataframe), Notes (explain that in this content, what are covered, what are points that competitors don't cover, and what requires human validation and review), and most importantly the Final Content. Provide just the final deliverable and nothing else:\n\n"
                                f"Content:\n {proofread_draft}\n\n"
                                f"SEO Recommendations:\n {seo_recommendation}\n"
                                f"Targeting Keywords:\n {final_keywords_df}\n"
                                f"Competitors:\n {df_serp}\n"
                                f"Competitors Analysis:\n {content_analysis}\n"
     }
]

response = client.chat.completions.create(
    model=model[1],
    messages=messages,
    temperature=0.75
)

final_deliverable = response.choices[0].message.content

In [None]:
print(final_deliverable)