In [1]:
import pandas as pd

df = pd.read_csv("ai_news_bot/news_content.csv")

# Summarizer

In [2]:
# ! pip install nltk, scikit-learn
# nltk.download('punkt')

In [3]:
import nltk
from nltk.tokenize import sent_tokenize
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def summarize_text(text, num_sentences=2):
    sentences = sent_tokenize(text)
    vectorizer = CountVectorizer().fit_transform(sentences)
    vectors = vectorizer.toarray()
    similarity_matrix = cosine_similarity(vectors)
    scores = similarity_matrix.sum(axis=1)
    ranked_sentences = [sentences[i] for i in scores.argsort()[-num_sentences:]]
    summarized_text = ' '.join(ranked_sentences)
    return summarized_text

In [4]:
news_dict = {'title': [], 'url': [], 'content': [], 'content_summary': [], 'thumbnail': [], 'publish_date': []}
number_of_news = len(df['title'])
number_of_news = 10

for i in range(0, number_of_news):
    row = df.iloc[i]
    content = row['content']
    content_summary = summarize_text(content, num_sentences=5)

    news_dict['title'].append(row['title'])
    news_dict['url'].append(row['url'])
    news_dict['content'].append(content)
    news_dict['content_summary'].append(content_summary)
    news_dict['thumbnail'].append(row['thumbnail'])
    news_dict['publish_date'].append(row['publish_date'])

In [7]:
news_df = pd.DataFrame(news_dict)

# Translator

In [8]:
import googletrans

news_dict = {'title': [], 'title_fa': [], 'url': [], 'content': [], 'content_fa': [], 'thumbnail': [], 'publish_date': []}
number_of_news = len(df['title'])
number_of_news = 10

for i in range(0, number_of_news):
    row = df.iloc[i]
    title = row['title']
    content = row['content']

    try:
        # translate content
        translator = googletrans.Translator()
        title_fa = translator.translate(title, src='en', dest='fa')
        content_fa = translator.translate(content, src='en', dest='fa')

        news_dict['title'].append(title)
        news_dict['title_fa'].append(title_fa.text)
        news_dict['url'].append(row['url'])
        news_dict['content'].append(content)
        news_dict['content_fa'].append(content_fa.text)
        news_dict['thumbnail'].append(row['thumbnail'])
        news_dict['publish_date'].append(row['publish_date'])
    except: print("error")

error


In [9]:
news_df = pd.DataFrame(news_dict)
news_df

Unnamed: 0,title,title_fa,url,content,content_fa,thumbnail,publish_date
0,"The Perception of Beauty Varies, but Memorabil...",درک زیبایی متفاوت است ، اما به یاد ماندنی هیچ ...,https://ainewstoday.co.uk/2023/07/25/the-perce...,Imagine spending a weekend afternoon with frie...,تصور کنید که یک بعد از ظهر آخر هفته را با دوست...,https://ainewstoday.co.uk/wp-content/uploads/2...,2023-07-25
1,An Insight into Our Strategy for Ensuring AI S...,بینشی از استراتژی ما برای اطمینان از ایمنی هوش...,https://ainewstoday.co.uk/2023/07/25/an-insigh...,"OpenAI, the artificial intelligence (AI) resea...",Openai ، آزمایشگاه تحقیقاتی هوش مصنوعی (AI) ، ...,https://ainewstoday.co.uk/wp-content/uploads/2...,2023-07-25
2,‘Bing Chat by Microsoft Being Tested for ‘Sele...,"""چت بینگ توسط مایکروسافت برای"" انتخاب کاربران ...",https://ainewstoday.co.uk/2023/07/25/bing-chat...,"Microsoft has confirmed that its AI chatbot, B...",مایکروسافت تأیید کرده است که AI Chatbot آن ، B...,https://ainewstoday.co.uk/wp-content/uploads/2...,2023-07-25
3,The Potential of AI in Promoting Equality in H...,پتانسیل هوش مصنوعی در ارتقاء برابری در مراقبت ...,https://ainewstoday.co.uk/2023/07/24/the-poten...,"Marzyeh Ghassemi, an Assistant Professor at CS...",مارزی قسممی ، استادیار در CSAIL ، IMES و EECS ...,https://ainewstoday.co.uk/wp-content/uploads/2...,2023-07-24
4,OpenAI Introduces New Bug Bounty Program,OpenAI برنامه جدید Bounty Bounty را معرفی می کند,https://ainewstoday.co.uk/2023/07/24/openai-in...,In its ongoing effort to create safe and secur...,Openai در تلاش مداوم خود برای ایجاد سیستم های ...,https://ainewstoday.co.uk/wp-content/uploads/2...,2023-07-24
5,Why Hollywood Strikes Reveal Our Distrust in C...,چرا اعتصابات هالیوود بی اعتمادی ما را به شرکته...,https://ainewstoday.co.uk/2023/07/24/why-holly...,History suggests that societies generally over...,تاریخ نشان می دهد که جوامع به طور کلی پیامدهای...,https://ainewstoday.co.uk/wp-content/uploads/2...,2023-07-24
6,Google Health Officer Urges Caution About AI’s...,افسر بهداشت گوگل در مورد محدودیت های هوش مصنوع...,https://ainewstoday.co.uk/2023/07/24/google-he...,"Google’s chief health officer, Dr Karen DeSalv...",مدیر ارشد بهداشت Google ، دکتر کارن دزالو ، هم...,https://ainewstoday.co.uk/wp-content/uploads/2...,2023-07-24
7,Desire Grows for Ukraine’s Battlefield Data,Desire برای داده های میدان نبرد اوکراین رشد می...,https://ainewstoday.co.uk/2023/07/24/desire-gr...,"In recent months, Ukraine has been vocal about...",در ماه های اخیر ، اوکراین در مورد برنامه های خ...,https://ainewstoday.co.uk/wp-content/uploads/2...,2023-07-24
8,Artificial Intelligence Stocks Experience Soar...,سهام هوش مصنوعی عملکرد بالایی را تجربه می کند ...,https://ainewstoday.co.uk/2023/07/24/artificia...,US tech companies were facing a challenging st...,شرکت های فناوری ایالات متحده با شروع سال چالش ...,https://ainewstoday.co.uk/wp-content/uploads/2...,2023-07-24
