In [None]:
!pip install youtube-transcript-api
!pip install pexels-api
!pip install google-generativeai
!pip install google-api-python-client

In [None]:
from googleapiclient.discovery import build
from datetime import datetime, timedelta
import pytz,time
from youtube_transcript_api import YouTubeTranscriptApi
import requests,random
import markdown
import google.generativeai as genai
from pexels_api import API
import nltk
from nltk.tokenize import word_tokenize
from nltk import pos_tag
from kaggle_secrets import UserSecretsClient
import pandas as pd
import random

In [None]:
user_secrets = UserSecretsClient()
gemini_1 = user_secrets.get_secret("gemini_1")
gemini_2 = user_secrets.get_secret("gemini_2")
pexels_api = user_secrets.get_secret("pexels api")
username = user_secrets.get_secret("wordpress username")
password = user_secrets.get_secret("wordpress password")
youtube_api_key = user_secrets.get_secret("youtube_api_key")

pexels_api = API(pexels_api)
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
url = 'https://dynoxglobal.com/wp-json/wp/v2/posts'

youtube = build('youtube', 'v3', developerKey=youtube_api_key)
file_path = '/kaggle/input/blog-topics/Dynox_topics.csv'

In [None]:
def youtube_search(query, max_results=500):
    videos_processed = 0
    next_page_token = None
    video_titles, video_ids = [], []

    while videos_processed < max_results:
        # Make a request to the API with pagination.
        search_response = youtube.search().list(
            q=query,
            part='id,snippet',
            maxResults=min(50, max_results - videos_processed),  # Maximum results per request is 50.
            order='viewCount',
            pageToken=next_page_token
        ).execute()

        for search_result in search_response.get('items', []):
            if search_result['id']['kind'] == 'youtube#video':
                title = search_result['snippet']['title']
                video_id = search_result['id']['videoId']
                video_titles.append(title)
                video_ids.append(video_id)
                print(f"Title: {title}, Video ID: {video_id}")

                videos_processed += 1
                if videos_processed == max_results:
                    break

        # Check if there are more results to fetch.
        next_page_token = search_response.get('nextPageToken')
        if not next_page_token:
            break

    return video_titles, video_ids

In [None]:
def find_important_words(text):
    """Find important words (nouns) in the text."""
    words = word_tokenize(text)
    tagged_words = pos_tag(words)
    important_words = [word for word, tag in tagged_words if tag in ['NN', 'NNS', 'NNP', 'NNPS']]
    return important_words

In [None]:
def retrieve_image(query):
    """Retrieve an image related to the query from Pexels."""
    pexels_api.search(query, page=1, results_per_page=10)
    photos = pexels_api.get_entries()
    if photos:
        rax = random.randint(0, len(photos)-1)
        return photos[rax].original
    else:
        return "No image found."

In [None]:
def get_transcription(video_id):
    """Get the transcription of a video given its ID."""
    try:
        transcript_list = YouTubeTranscriptApi.get_transcript(video_id,languages=['en'])
    except Exception as e:
        print("An error occurred while fetching the transcript:")
        return ""

    full_script = ' '.join(segment['text'] for segment in transcript_list)
    return full_script

In [None]:
def generate_content(prompt, token):
    """Generate content using GenAI."""
    generation_config = {
        "temperature": 0.9,
        "top_p": 1,
        "top_k": 1,
        "max_output_tokens": token,
    }
    safety_settings = [
        {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_ONLY_HIGH"},
        {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_ONLY_HIGH"},
        {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_ONLY_HIGH"},
        {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_ONLY_HIGH"},
    ]

    try:
        model = genai.GenerativeModel(model_name="gemini-pro",
                                    generation_config=generation_config,
                                    safety_settings=safety_settings)

        response = model.generate_content(prompt)
        return response.text
    except Exception as e:
        print("An error occurred during content generation:", e)
        return ""

In [None]:
df = pd.read_csv(file_path, header=None)
random_row = random.randint(0, len(df) - 1) 
random_topic = df.iloc[random_row, 0]

posted = 0
max_results=1
query = random_topic
titles, ids = youtube_search(query, max_results)

In [None]:
for title, video_id in zip(titles, ids):
    transcript = get_transcription(video_id)
    if not transcript:
        print("Skipping video due to lack of transcript.")
        continue
        
    if posted%2 == 0:
        genai.configure(api_key=gemini_1)
        print("using key 1")
    else:
        genai.configure(api_key=gemini_2)
        print("using key 2")
        
    seo_title = generate_content(f"write a very search friendly seo tittle for a article that is on the topic of {title}, only reply with the tittle you should not add any extra words or line or tag or heading to it, only respond with the SEO Tittle", 150)
    content = generate_content(f"write a well structured article use H3 subheadings, and points and should around 600 words on the topic -{title} using the contents in -{transcript}, the article should be written like human written must by pass all the AI detection test, and write it in a simple and easy to understand and higly focus on redablity,write it as article, dont metion it is from a video make sure you write it like a human would", 2048)
    faq = generate_content(f"write a simple Q&A section a article, with various question people might ask for the topis - {seo_title} use the content in -{transcript} the q&a should be well structured use h3 tags for questions, the questions should be what people might search for regarding the topic, the answer should not be more then a line or tow at max",1024)
    slug = seo_title.replace(" ", "-").lower()
    html_content = markdown.markdown(content)
    faq_content = markdown.markdown(faq)
    important_words = find_important_words(seo_title)
    if important_words:
        search_query = important_words[0]  # Use the first important word for simplicity
        print(search_query)
        image_url = retrieve_image(search_query)
        print("Retrieved Image URL:", image_url)
    else:
        print("No important words found in title for image retrieval.")
    print("SEO tittle:", seo_title)
    # print("Q&A:", faq)
    # print("HTML Content:", html_content)

    imagu = f'<img src="{image_url}" alt="{seo_title}">'
    linku = f'''<br><br>
If you need any type of assistance, we provide 100% free consultancy and guidance for everyone
<a href="https://dynoxglobal.com/contact-dynox-global/" title="Contact Dynox Global for free consultancy and guidance"><u>Click here</u></a>
to contact us. To know more about us
<a href="https://www.wikipedia.com" title="Learn more about us on Wikipedia"><u>Click Here</u></a>
<br><br><h3>Q&A Section:<br></h3>'''

    final_content = imagu + html_content + linku + faq_content
    final_content.replace("H2", "H1")

    post = {
    'title': f'{seo_title}',
    'content': f'''{final_content}''',
    'status': 'publish',
    'slug': f'{slug}'}

    response = requests.post(url, auth=(username, password), json=post)

    if response.status_code == 201:
        posted += 1
        print('Post was published successfully with SEO data!', posted)
    else:
        print('Failed to publish post:', response.content)

In [None]:
#this is comment