In [256]:
from deep_translator.exceptions import RequestError
from deep_translator import GoogleTranslator

import pandas as pd
import unicodedata
import dateparser
import numpy as np
import ast
import re

In [257]:
def en_ru(text):
    try:
        return GoogleTranslator(source='en', target='ru').translate(text)
    except Exception as e:
        return np.nan

def ru_en(text):
    try:
        return GoogleTranslator(source='ru', target='en').translate(text)
    except Exception as e:
        return np.nan

In [305]:
names = [
    'silkwaychina', #+
    'prc_roday', #+
    'russian.news.cn', #+
    'businessemirates', #+
    'india-briefing', #+
    'vietnam-briefing', #+
    'middleeastbriefing', #+
    'indianstartupnews', #+
    'mea-markets', #+
    'agbi', 
    'chinadaily'
]

### Обработка данных

In [306]:
data_all = pd.DataFrame()

In [307]:
cols = ['source', 'country', 'language', 'title', 'date', 'href', 'text']

name = 'silkwaychina'
country = 'China'
language = 'ru'

data = pd.read_excel(f'{name}.xlsx', index_col=0)
data = data[data['title']!='']
data = data[data['href']!='']
data = data[data['text']!='']

data = data[~data.title.isna()]
data = data[~data.href.isna()]
data = data[~data.text.isna()]
data['country'] = country
data['language'] = language
data.text = data.text.apply(lambda value: value.replace('\r', '').replace('\n', '').replace('\t', '').rstrip().lstrip())
data['date'] = data['date'].apply(lambda value: dateparser.parse(value, languages=['ru']).date())

data = data[cols]

data_all = pd.concat([data_all, data], axis=0)

data.head()

Unnamed: 0,source,country,language,title,date,href,text
0,silkwaychina,China,ru,Важная информация - в Китае сняли все каранти...,2022-12-23,https://silkwaychina.ru/news/news/novosti/vazh...,На данный момент в Китае сняли все карантинные...
0,silkwaychina,China,ru,Важная информация - праздничные выходные! 2023,2022-12-20,https://silkwaychina.ru/news/news/novosti/vazh...,"Уважаемые клиенты, впереди нас ждёт череда пра..."
0,silkwaychina,China,ru,Складские хранений - популярная услуга в Китае,2022-12-19,https://silkwaychina.ru/news/news/statya/sklad...,Китайская сторона заинтересована в рыночном па...
0,silkwaychina,China,ru,Скоро Китайский Новый год 2023,2022-12-13,https://silkwaychina.ru/news/news/novosti/uspe...,"Друзья, хотим напомнить что скоро Китайский Но..."
0,silkwaychina,China,ru,Перевозки продуктов питания из Китая в РФ,2022-12-11,https://silkwaychina.ru/news/news/statya/perev...,Продукты питанияв настоящий момент не так попу...


In [308]:
cols = ['source', 'country', 'language', 'title', 'date', 'href', 'text']

name = 'prc_roday'
country = 'China'
language = 'ru'

data = pd.read_excel(f'{name}.xlsx', index_col=0)
data = data[data['title']!='']
data = data[data['href']!='']
data = data[data['text']!='']

data = data[~data.title.isna()]
data = data[~data.href.isna()]
data = data[~data.text.isna()]
data['date'] = pd.to_datetime(data['date'])
data['country'] = country
data['language'] = language
data.text = data.text.apply(lambda value: value.replace('\r', '').replace('\n', '').replace('\t', '').rstrip().lstrip())

data = data[cols]

data_all = pd.concat([data_all, data], axis=0)

data.head()

Unnamed: 0,source,country,language,title,date,href,text
0,prc_roday,China,ru,Экономика Китая устойчива в первом квартале,2024-04-03 16:41:43,https://www.india-briefing.com/news/top-sector...,Экономика Китая приветствовала уверенный старт...
0,prc_roday,China,ru,Добыча угля в Китае достигла исторического мак...,2024-04-03 14:40:27,https://www.india-briefing.com/news/top-sector...,Добыча угля в Китае: объемы производства и имп...
0,prc_roday,China,ru,"Компания Huawei заявляет о росте выручки на 9,...",2024-04-03 12:39:37,https://www.india-briefing.com/news/top-sector...,"Китайская компания Huawei сообщила 29 марта, ч..."
0,prc_roday,China,ru,Немецкая оптическая компания ZEISS открывает ц...,2024-04-03 10:39:21,https://www.india-briefing.com/news/top-sector...,"ZEISS, немецкая транснациональная компания, ра..."
0,prc_roday,China,ru,Качественное развитие китайской экономики – бл...,2024-04-03 08:43:59,https://www.india-briefing.com/news/top-sector...,Развитие китайской экономики: Высококачественн...


In [309]:
cols = ['source', 'country', 'language', 'title', 'date', 'href', 'text']

name = 'russian.news.cn'
country = 'China'
language = 'ru'

data = pd.read_excel(f'{name}.xlsx', index_col=0)
data = data[data['title']!='']
data = data[data['href']!='']
data = data[data['text']!='']

data = data[~data.title.isna()]
data = data[~data.href.isna()]
data = data[~data.text.isna()]
data['date'] = pd.to_datetime(data['date'])
data['country'] = country
data['language'] = language

result_list = []

for value in data.text:
    if len(value.split('--')) > 1:
        result = value.split('--')[1].replace('\r', '').replace('\n', '').replace('\t', '').rstrip().lstrip()
    else:
        result = value.replace('\r', '').replace('\n', '').replace('\t', '').rstrip().lstrip()
        
    result_list.append(result)

data.text = result_list

data = data[cols]

data.head()

Unnamed: 0,source,country,language,title,date,href,text
0,russian.news.cn,China,ru,Китай начал серийное производство изотопов угл...,2024-04-21 16:38:15,https://russian.news.cn/20240421/278723f0f5a04...,Китай запустил серийное производство углерода-...
0,russian.news.cn,China,ru,Продажи коммерческих транспортных средств в Ки...,2024-04-21 15:53:15,https://russian.news.cn/20240421/bc945a7980c34...,В первом квартале 2024 года продажи коммерческ...
0,russian.news.cn,China,ru,Объем добычи природного газа в Китае вырос в я...,2024-04-21 15:39:15,https://russian.news.cn/20240421/afa6fb2000224...,В первом квартале 2024 года в Китае был устано...
0,russian.news.cn,China,ru,Производство и продажи пикапов в Китае выросли...,2024-04-21 14:59:00,https://russian.news.cn/20240421/aa1cb965ab1f4...,В первом квартале 2024 года китайский рынок пи...
0,russian.news.cn,China,ru,Выработка электроэнергии в Китае в марте вырос...,2024-04-21 13:25:00,https://russian.news.cn/20240421/53cffb16560c4...,В марте 2024 года выработка электричества веду...


In [310]:
cols = ['source', 'country', 'language', 'title', 'date', 'href', 'text']

name = 'businessemirates'
country = 'UAE'
language = 'ru'

data = pd.read_excel(f'{name}.xlsx', index_col=0)
data = data[data['title']!='']
data = data[data['href']!='']
data = data[data['text']!='']

data = data[~data.title.isna()]
data = data[~data.href.isna()]
data = data[~data.text.isna()]
data['country'] = country
data['language'] = language
data.text = data.text.apply(lambda value: value.replace('\n\nЧИТАЙТЕ НАШИ НОВОСТИ \nВ TELEGRAM-КАНАЛЕ\n\n\n', '').replace('\r', '').replace('\n', '').replace('\t', '').rstrip().lstrip())
data['date'] = data['date'].apply(lambda value: dateparser.parse(value))

data = data[cols]

data_all = pd.concat([data_all, data], axis=0)

data.head()

Unnamed: 0,source,country,language,title,date,href,text
0,businessemirates,UAE,ru,Цены на золото в ОАЭ продолжают расти,2024-10-04 07:40:00,https://businessemirates.ae/news/uae-property-...,"Цены на золото в ОАЭ продолжили расти, повысив..."
0,businessemirates,UAE,ru,Абу-Даби вошел в десятку самых «умных городов»...,2024-10-04 07:10:00,https://businessemirates.ae/news/uae-property-...,Абу-Даби вошел в десятку самых «умных городов»...
0,businessemirates,UAE,ru,Шейх Аль Мактум издал новый указ о судах Дубая,2024-09-04 07:40:00,https://businessemirates.ae/news/uae-property-...,В качестве правителя Дубая шейх Мухаммед бен Р...
0,businessemirates,UAE,ru,В ОАЭ появилась должность советника по передов...,2024-09-04 07:10:00,https://businessemirates.ae/news/uae-property-...,Президент ОАЭ Его Высочество шейх Мухаммед бен...
0,businessemirates,UAE,ru,​ОАЭ создадут альтернативу Суэцкого канала,2024-09-04 06:20:00,https://businessemirates.ae/news/uae-property-...,Портовый и логистический оператор из Абу-Даби ...


In [311]:
cols = ['source', 'country', 'language', 'title', 'date', 'href', 'text']

name = 'india-briefing'
country = 'India'
language = 'eng'

data = pd.read_excel(f'{name}.xlsx', index_col=0)
data = data[data['title']!='']
data = data[data['href']!='']
data = data[data['text']!='']

data = data[~data.title.isna()]
data = data[~data.href.isna()]
data = data[~data.text.isna()]
data['country'] = country
data['language'] = language

data['date'] = pd.to_datetime(data['date'])
data['date'] = data['date'].dt.tz_localize(None)
data.text = data.text.apply(lambda value: unicodedata.normalize("NFKD", value).replace('\r', '').replace('\n', '').replace('\t', '').rstrip().lstrip())
data.title = data.title.apply(lambda value: unicodedata.normalize("NFKD", value).replace('\r', '').replace('\n', '').replace('\t', '').rstrip().lstrip())

data = data[cols]

data_all = pd.concat([data_all, data], axis=0)

data.head()

Unnamed: 0,source,country,language,title,date,href,text
0,india-briefing,India,eng,Investing in Tamil Nadu: Manufacturing Outlook...,2024-04-22 19:30:43,https://www.india-briefing.com/news/investing-...,India Briefing profiles India’s southern state...
0,india-briefing,India,eng,India Identifies List of 30 Critical Minerals:...,2024-04-17 11:25:55,https://www.india-briefing.com/news/india-iden...,India has released a list of 30 critical miner...
0,india-briefing,India,eng,India-Peru Trade Agreement: Bilateral Negotiat...,2024-04-15 20:11:10,https://www.india-briefing.com/news/india-peru...,India and Peru are negotiating a bilateral tra...
0,india-briefing,India,eng,India-ASEAN Trade Pact: Tariff Anomalies on Go...,2024-04-12 20:31:28,https://www.india-briefing.com/news/india-asea...,India has begun reviewing its trade agreement ...
0,india-briefing,India,eng,India’s Regulation of Solar PV Modules: Approv...,2024-04-08 21:32:53,https://www.india-briefing.com/news/india-appr...,While India has relaxed regulations concerning...


In [312]:
cols = ['source', 'country', 'language', 'title', 'date', 'href', 'text']

name = 'vietnam-briefing'
country = 'Vietnam'
language = 'eng'

data = pd.read_excel(f'{name}.xlsx', index_col=0)
data = data[data['title']!='']
data = data[data['href']!='']
data = data[data['text']!='']

data = data[~data.title.isna()]
data = data[~data.href.isna()]
data = data[~data.text.isna()]
data['country'] = country
data['language'] = language

data['date'] = pd.to_datetime(data['date'])
data['date'] = data['date'].dt.tz_localize(None)
data.text = data.text.apply(lambda value: unicodedata.normalize("NFKD", value).replace('\r', '').replace('\n', '').replace('\t', '').rstrip().lstrip())
data.title = data.title.apply(lambda value: unicodedata.normalize("NFKD", value).replace('\r', '').replace('\n', '').replace('\t', '').rstrip().lstrip())

data = data[cols]

data_all = pd.concat([data_all, data], axis=0)

data.head()

Unnamed: 0,source,country,language,title,date,href,text
0,vietnam-briefing,Vietnam,eng,Vietnam Exports to the US in Q1 2024 Reach US$...,2024-04-24 21:35:25,https://www.vietnam-briefing.com/news/vietnam-...,Vietnam recorded a significant surge in export...
0,vietnam-briefing,Vietnam,eng,Unlocking Investment Opportunities: Vietnam’s ...,2024-04-23 16:40:06,https://www.vietnam-briefing.com/news/vietnam-...,We note the increased business activity seen i...
0,vietnam-briefing,Vietnam,eng,ESG Focus Among Foreign Businesses in Vietnam:...,2024-04-18 14:48:00,https://www.vietnam-briefing.com/news/esg-focu...,"ESG (environmental, social, and governance) ma..."
0,vietnam-briefing,Vietnam,eng,Vietnam-Spain Bilateral Relations: Trade and I...,2024-04-16 22:47:42,https://www.vietnam-briefing.com/news/vietnam-...,Vietnam and Spain have developed relatively cl...
0,vietnam-briefing,Vietnam,eng,Why Cambodia’s Funan Techo Canal Project is Wo...,2024-04-09 19:05:32,https://www.vietnam-briefing.com/news/why-camb...,Despite the positive outcomes predicted by the...


In [313]:
cols = ['source', 'country', 'language', 'title', 'date', 'href', 'text']

name = 'middleeastbriefing'
country = 'Middle East'
language = 'eng'

data = pd.read_excel(f'{name}.xlsx', index_col=0)
data = data[data['title']!='']
data = data[data['href']!='']
data = data[data['text']!='']

data = data[~data.title.isna()]
data = data[~data.href.isna()]
data = data[~data.text.isna()]
data['country'] = country
data['language'] = language

data['date'] = pd.to_datetime(data['date'])
data['date'] = data['date'].dt.tz_localize(None)
data.text = data.text.apply(lambda value: unicodedata.normalize("NFKD", value).replace('\r', '').replace('\n', '').replace('\t', '').rstrip().lstrip())
data.title = data.title.apply(lambda value: unicodedata.normalize("NFKD", value).replace('\r', '').replace('\n', '').replace('\t', '').rstrip().lstrip())

data = data[cols]

data_all = pd.concat([data_all, data], axis=0)

data.head()

Unnamed: 0,source,country,language,title,date,href,text
0,middleeastbriefing,Middle East,eng,"Over 15,000 Indian Companies Joined Dubai Cham...",2024-04-24 14:08:43,https://www.middleeastbriefing.com/news/over-1...,Dubai continues to be a magnet for Indian busi...
0,middleeastbriefing,Middle East,eng,Understanding the Dubai Unified Licence: Scope...,2024-04-16 08:39:24,https://www.middleeastbriefing.com/news/dubai-...,The Dubai Unified Licence streamlines business...
0,middleeastbriefing,Middle East,eng,Unveiling Opportunities: The United Arab Emira...,2024-04-16 07:57:54,https://www.middleeastbriefing.com/news/unveil...,An attractive tax and regulatory environment a...
0,middleeastbriefing,Middle East,eng,Saudi Arabia Gives Status Update on NEOM Proje...,2024-04-15 12:45:06,https://www.middleeastbriefing.com/news/saudi-...,"NEOM, Saudi Arabia’s urban megaproject, has re..."
0,middleeastbriefing,Middle East,eng,Non-Oil Sector Driving Bahrain’s GDP Growth: D...,2024-04-12 07:07:34,https://www.middleeastbriefing.com/news/non-oi...,Key non-oil sectors poised to drive GDP growth...


In [314]:
cols = ['source', 'country', 'language', 'title', 'date', 'href', 'text']

name = 'indianstartupnews'
country = 'India'
language = 'eng'

data = pd.read_excel(f'{name}.xlsx', index_col=0)
data = data[data['title']!='']
data = data[data['href']!='']
data = data[data['text']!='']

data = data[~data.title.isna()]
data = data[~data.href.isna()]
data = data[~data.text.isna()]
data['country'] = country
data['language'] = language

data['date'] = pd.to_datetime(data['date'])
data.text = data.text.apply(lambda value: unicodedata.normalize("NFKD", value).replace('\r', '').replace('\n', '').replace('\t', '').rstrip().lstrip())
data.title = data.title.apply(lambda value: unicodedata.normalize("NFKD", value).replace('\r', '').replace('\n', '').replace('\t', '').rstrip().lstrip())

data = data[cols]

data_all = pd.concat([data_all, data], axis=0)

data.head()

Unnamed: 0,source,country,language,title,date,href,text
0,indianstartupnews,India,eng,YouTuber Gaurav Taneja launches his ghee brand...,2024-04-12,/news/youtuber-gaurav-taneja-launches-his-ghee...,"Gaurav Taneja, also known as 'Flying Beast', i..."
0,indianstartupnews,India,eng,Agora partners With EZDRM to bring content pro...,2024-04-13,/news/agora-partners-with-ezdrm-to-bring-conte...,"Agora, a platform for real-time engagement API..."
0,indianstartupnews,India,eng,Tech distribution and logistics platform Rippl...,2024-04-13,/news/tech-distribution-and-logistics-platform...,Bengaluru-based Ripplr showcased an extraordin...
0,indianstartupnews,India,eng,Mark Zuckerberg's Meta starts testing AI chatb...,2024-04-13,/news/meta-starts-testing-ai-chatbot-on-whatsa...,Meta has initiated testing of its AI-powered c...
0,indianstartupnews,India,eng,India embraces affordable 4G and 5G phones for...,2024-04-13,/news/india-embraces-affordable-4g-and-5g-phon...,A recent report by CyberMedia Research (CMR) h...


In [315]:
cols = ['source', 'country', 'language', 'title', 'date', 'href', 'text']

name = 'mea-markets'
country = 'UAE'
language = 'eng'

data = pd.read_excel(f'{name}.xlsx', index_col=0)
data = data[data['title']!='']
data = data[data['href']!='']
data = data[data['text']!='']

data = data[~data.title.isna()]
data = data[~data.href.isna()]
data = data[~data.text.isna()]
data['country'] = country
data['language'] = language

data['date'] = data.date.apply(lambda value: pd.to_datetime(' '.join(value.replace(',', '').split(' ')[2:])))
data.text = data.text.apply(lambda value: unicodedata.normalize("NFKD", value).replace('\r', '').replace('\n', '').replace('\t', '').rstrip().lstrip())
data.title = data.title.apply(lambda value: unicodedata.normalize("NFKD", value).replace('\r', '').replace('\n', '').replace('\t', '').rstrip().lstrip())

data = data[cols]

data_all = pd.concat([data_all, data], axis=0)

data.head()

Unnamed: 0,source,country,language,title,date,href,text
0,mea-markets,UAE,eng,Deloitte Launches Kiyadat to Advance GCC Natio...,2024-04-18,https://www.mea-markets.com/deloitte-launches-...,"In Arabic, “Kiyadat” translates to “Leaders”, ..."
0,mea-markets,UAE,eng,United Nations Leaders Call for More Action to...,2024-04-17,https://www.mea-markets.com/united-nations-lea...,UN Secretary-General António Guterres celebra...
0,mea-markets,UAE,eng,Red Sea Crisis Underlines the Need for Greater...,2024-04-09,https://www.mea-markets.com/red-sea-crisis-und...,The speed with which shipping companies have r...
0,mea-markets,UAE,eng,Local SMEs Upskilled Through Enterprise Develo...,2024-03-26,https://www.mea-markets.com/local-smes-upskill...,"The programme, a collaborative effort between ..."
0,mea-markets,UAE,eng,The Sky’s The Limit For Domestic and Inbound T...,2024-03-20,https://www.mea-markets.com/the-skys-the-limit...,"While location and facilities are important, e..."


In [316]:
cols = ['source', 'country', 'language', 'title', 'date', 'href', 'text']

name = 'agbi'
country = 'Arabian'
language = 'eng'

data = pd.read_excel(f'{name}.xlsx', index_col=0)
data = data[data['title']!='']
data = data[data['href']!='']
data = data[data['text']!='']

data = data[~data.title.isna()]
data = data[~data.href.isna()]
data = data[~data.text.isna()]
data['country'] = country
data['language'] = language

data['date'] = data.date.apply(lambda value: pd.to_datetime(value.replace('\t', '').replace('\n', '')))
data.text = data.text.apply(lambda value: unicodedata.normalize("NFKD", value).replace('\r', '').replace('\n', '').replace('\t', '').rstrip().lstrip())
data.title = data.title.apply(lambda value: unicodedata.normalize("NFKD", value).replace('\r', '').replace('\n', '').replace('\t', '').rstrip().lstrip())

data = data[cols]

data_all = pd.concat([data_all, data], axis=0)

data.head()

Unnamed: 0,source,country,language,title,date,href,text
0,agbi,Arabian,eng,What Egypt needs to do to please its $60bn bac...,2024-03-24,https://www.agbi.com/analysis/economy/2024/03/...,A fast-paced four weeks may have transformed E...
0,agbi,Arabian,eng,Egypt must deal with its fundamental challenges,2024-03-21,https://www.agbi.com/opinion/economy/2024/03/a...,Abu Dhabi’s announcement that it would invest ...
0,agbi,Arabian,eng,Abu Dhabi’s futuristic vision of an Egyptian m...,2024-03-22,https://www.agbi.com/analysis/tourism/2024/03/...,"By 2052, Egypt’s Ras El Hekma will be “competi..."
0,agbi,Arabian,eng,Currency bond traders flow into Egypt after ba...,2024-03-14,https://www.agbi.com/analysis/economy/2024/03/...,JPMorgan Chase & Co has issued a circular urgi...
0,agbi,Arabian,eng,Egypt’s $8bn rescue package brings new challenges,2024-03-07,https://www.agbi.com/opinion/economy/2024/03/s...,Egypt has finally secured a hefty Internationa...


In [317]:
cols = ['source', 'country', 'language', 'title', 'date', 'href', 'text']

name = 'chinadaily'
country = 'China'
language = 'eng'

data = pd.read_excel(f'{name}.xlsx', index_col=0)
data = data[data['title']!='']
data = data[data['href']!='']
data = data[data['text']!='']

data = data[~data.title.isna()]
data = data[~data.href.isna()]
data = data[~data.text.isna()]
data['country'] = country
data['language'] = language

data['date']  = data['date'].apply(lambda value: value.split('Updated: ')[1].replace('\n', '').lstrip().rstrip())
data['date'] = pd.to_datetime(data['date'])
data.text = data.text.apply(lambda value: unicodedata.normalize("NFKD", value).replace('\r', '').replace('\n', '').replace('\t', '').rstrip().lstrip())
data.title = data.title.apply(lambda value: unicodedata.normalize("NFKD", value).replace('\r', '').replace('\n', '').replace('\t', '').rstrip().lstrip())

data = data[cols]

data_all = pd.concat([data_all, data], axis=0)

data.head()

Unnamed: 0,source,country,language,title,date,href,text
0,chinadaily,China,eng,Huawei unveiled upgraded SUV amid competition,2024-04-23 20:28:00,https://www.chinadaily.com.cn/a/202404/23/WS66...,Huawei Technologies Co on Tuesday unveiled its...
0,chinadaily,China,eng,HNBR plant starts construction in Changzhou,2024-04-23 19:49:00,https://www.chinadaily.com.cn/a/202404/23/WS66...,Construction of a hydrogenated nitrile butadie...
0,chinadaily,China,eng,Two China and US leading cloud-based solution ...,2024-04-23 19:32:00,https://www.chinadaily.com.cn/a/202404/23/WS66...,"Cloopen, a leading cloud-based communications ..."
0,chinadaily,China,eng,L'Oréal continues to chalk up sales in Chines...,2024-04-23 17:09:00,https://www.chinadaily.com.cn/a/202404/23/WS66...,"L'Oréal China continues to lead the market, w..."
0,chinadaily,China,eng,Air China prepares to resume four more interna...,2024-04-23 16:38:00,https://www.chinadaily.com.cn/a/202404/23/WS66...,"Air China, one of China's largest airline comp..."


In [318]:
data_all.to_excel('data_all.xlsx')

### Перевод

In [325]:
data_all = pd.read_excel('data_all.xlsx', index_col=0)

In [327]:
%%time

data_all['title'] = data_all.apply(lambda row: en_ru(row['title']) if row['language']=='eng' else row['title'], axis=1)

data_all['text'] = data_all.apply(lambda row: en_ru(row['text']) if row['language']=='eng' else row['text'], axis=1)

CPU times: total: 9min 12s
Wall time: 20min 47s


In [328]:
data_all

Unnamed: 0,source,country,language,title,date,href,text
0,silkwaychina,China,ru,Важная информация - в Китае сняли все каранти...,2022-12-23 00:00:00,https://silkwaychina.ru/news/news/novosti/vazh...,На данный момент в Китае сняли все карантинные...
0,silkwaychina,China,ru,Важная информация - праздничные выходные! 2023,2022-12-20 00:00:00,https://silkwaychina.ru/news/news/novosti/vazh...,"Уважаемые клиенты, впереди нас ждёт череда пра..."
0,silkwaychina,China,ru,Складские хранений - популярная услуга в Китае,2022-12-19 00:00:00,https://silkwaychina.ru/news/news/statya/sklad...,Китайская сторона заинтересована в рыночном па...
0,silkwaychina,China,ru,Скоро Китайский Новый год 2023,2022-12-13 00:00:00,https://silkwaychina.ru/news/news/novosti/uspe...,"Друзья, хотим напомнить что скоро Китайский Но..."
0,silkwaychina,China,ru,Перевозки продуктов питания из Китая в РФ,2022-12-11 00:00:00,https://silkwaychina.ru/news/news/statya/perev...,Продукты питанияв настоящий момент не так попу...
...,...,...,...,...,...,...,...
0,chinadaily,China,eng,Новый центр расширит охват китайских фирм,2023-12-14 09:27:00,https://www.chinadaily.com.cn/a/202312/14/WS65...,Инновационный центр Amazon в Шэньчжэне поможет...
0,chinadaily,China,eng,Выпущен новый умный дрон для сельского хозяйства,2023-12-13 20:44:00,https://www.chinadaily.com.cn/a/202312/13/WS65...,"Компания XAG Co Ltd, один из крупнейших произв..."
0,chinadaily,China,eng,"Действительно, развлекательные компании активи...",2023-12-13 16:53:00,https://www.chinadaily.com.cn/a/202312/13/WS65...,С ростом потребления и повышением осведомленно...
0,chinadaily,China,eng,Генеральный директор Fortescue Metals: энергет...,2023-12-08 11:02:00,https://www.chinadaily.com.cn/a/202312/08/WS65...,Энергетический переход в Китае предоставляет т...


In [332]:
data_all['year'] = data_all['date'].dt.year

data_all['year'].value_counts()

year
2023.0    2040
2024.0    1684
2022.0     373
2021.0     217
2016.0      47
2020.0      28
2015.0      19
2019.0      17
2018.0       9
2017.0       6
Name: count, dtype: int64

In [329]:
data_all.to_excel('data_all_with_translate.xlsx')