In [1]:
#!pip install yahoo_fin

## Importing Library

In [2]:
# importing Library
import pandas as pd
import numpy as np
import yfinance as yf
from yahoo_fin.stock_info import get_data
from yahoo_fin import news as yahoo_news
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

In [38]:
# get the Dataset
data=pd.read_csv('China_compnanies.csv')

In [39]:
data.head()

Unnamed: 0,"Company Name,Abbreviation"
0,"Alibaba Group Holding Limited,BABA"
1,"Tencent Holdings Limited,TCEHY"
2,"China Mobile Communications Corporation,CHL"
3,"Industrial and Commercial Bank of China,IDCBY"
4,"China Construction Bank Corporation,CICHY"


In [40]:
# getting company name from the given Dataset
data[['Company_Name', 'Abbreviation']] = data['Company Name,Abbreviation'].str.split(',',n=1, expand=True)

In [41]:
# change the column name of the dataset
data.drop(columns=['Company Name,Abbreviation'],inplace=True)

In [42]:
# copy Data on the Root dataset
data.head()

Unnamed: 0,Company_Name,Abbreviation
0,Alibaba Group Holding Limited,BABA
1,Tencent Holdings Limited,TCEHY
2,China Mobile Communications Corporation,CHL
3,Industrial and Commercial Bank of China,IDCBY
4,China Construction Bank Corporation,CICHY


## The Dataset of Top 50 companies of the China are created

In [43]:
def get_news2(com_name, number_of_news=30):
    try:

    
        news_of_company = yahoo_news.get_yf_rss(com_name) # The news of the company get and store in news of company
        news_of_company = news_of_company[:number_of_news] # the number of news fetch from the api
        news_data = [(article['title'], article['summary'], article['published']) for article in news_of_company]
        dataset_news = pd.DataFrame(news_data, columns=['Title', 'Content','published_date']) # creating the datsset
        dataset_news['Company'] = com_name # store the abbreivation
        return dataset_news
    except Exception as e:
       
        return None


In [44]:
# crating the dataset of ever company 
news_dataset = []
for company_abrevation in data['Abbreviation']:
    company_news = get_news2(company_abrevation)
    if company_news is not None:
        news_dataset.append(company_news)
        

all_news_df = pd.concat(news_dataset)




In [45]:
#The dataset is form  
all_news_df

Unnamed: 0,Title,Content,published_date,Company
0,Chinese start-up Moonshot AI raises US$1 billi...,Chinese artificial intelligence start-up Moons...,"Tue, 20 Feb 2024 09:30:00 +0000",BABA
1,Alibaba Stock Falls as Chinese Markets Disappo...,Shanghai stocks advanced on Monday but posted ...,"Mon, 19 Feb 2024 11:42:00 +0000",BABA
2,Big Pension Bought Up NYCB Stock Right Before ...,A New York pension bought up shares of New Yor...,"Mon, 19 Feb 2024 08:15:00 +0000",BABA
3,25 Most Valuable Tech Companies Outside The US,"In this article, we will take a look at the 25...","Fri, 16 Feb 2024 20:47:36 +0000",BABA
4,No Clear Sign of a Turnaround for Alibaba,Taobao and TMall Group's troubles overshadow g...,"Fri, 16 Feb 2024 13:00:03 +0000",BABA
...,...,...,...,...
15,Country Garden to Make CNY3.07 Billion Disposa...,Chinese property giant Country Garden will sel...,"Thu, 14 Dec 2023 01:16:00 +0000",2007.HK
16,Country Garden sells stake in Dalian Wanda uni...,HONG KONG (Reuters) -China's embattled Country...,"Thu, 14 Dec 2023 00:37:15 +0000",2007.HK
17,UPDATE 1-Country Garden has remitted funds to ...,Embattled Chinese developer Country Garden Hol...,"Wed, 13 Dec 2023 12:00:25 +0000",2007.HK
18,Country Garden repays US$111 million bond in f...,Embattled property developer Country Garden Ho...,"Wed, 13 Dec 2023 09:30:00 +0000",2007.HK


In [46]:
# Convert the 'date_str' column to datetime format
all_news_df['date'] = pd.to_datetime(all_news_df['published_date'], format='%a, %d %b %Y %H:%M:%S %z').dt.date

In [47]:
# drop the column published date
all_news_df.drop(columns='published_date',inplace=True)

In [48]:
# Data set after converting the date
all_news_df.head()

Unnamed: 0,Title,Content,Company,date
0,Chinese start-up Moonshot AI raises US$1 billi...,Chinese artificial intelligence start-up Moons...,BABA,2024-02-20
1,Alibaba Stock Falls as Chinese Markets Disappo...,Shanghai stocks advanced on Monday but posted ...,BABA,2024-02-19
2,Big Pension Bought Up NYCB Stock Right Before ...,A New York pension bought up shares of New Yor...,BABA,2024-02-19
3,25 Most Valuable Tech Companies Outside The US,"In this article, we will take a look at the 25...",BABA,2024-02-16
4,No Clear Sign of a Turnaround for Alibaba,Taobao and TMall Group's troubles overshadow g...,BABA,2024-02-16


In [49]:
all_news_df.rename(columns={'Company':'Abbreviation'},inplace=True)

In [50]:
merge_data=pd.merge(all_news_df,data,on='Abbreviation')

##  NLP

In [51]:
lemma=WordNetLemmatizer()

def text_pre_preocessing(text):
    
    token=word_tokenize(text) # convert the sentences into token
    token_without_stop=[word for word in token if word.lower() not in stopwords.words('english')] # removing the stop words

    
    
    #lemmatization
    
    lem_token=[lemma.lemmatize(token) for token in token_without_stop] # getting the real meaning word
    
    return lem_token

In [52]:
# titel preprocesing
merge_data['Titel_preprosses']=merge_data['Title'].apply(text_pre_preocessing)

In [53]:
# content preprocessing
merge_data['Content_preprosses']=merge_data['Content'].apply(text_pre_preocessing)

In [54]:
merge_data

Unnamed: 0,Title,Content,Abbreviation,date,Company_Name,Titel_preprosses,Content_preprosses
0,Chinese start-up Moonshot AI raises US$1 billi...,Chinese artificial intelligence start-up Moons...,BABA,2024-02-20,Alibaba Group Holding Limited,"[Chinese, start-up, Moonshot, AI, raise, US, $...","[Chinese, artificial, intelligence, start-up, ..."
1,Alibaba Stock Falls as Chinese Markets Disappo...,Shanghai stocks advanced on Monday but posted ...,BABA,2024-02-19,Alibaba Group Holding Limited,"[Alibaba, Stock, Falls, Chinese, Markets, Disa...","[Shanghai, stock, advanced, Monday, posted, re..."
2,Big Pension Bought Up NYCB Stock Right Before ...,A New York pension bought up shares of New Yor...,BABA,2024-02-19,Alibaba Group Holding Limited,"[Big, Pension, Bought, NYCB, Stock, Right, Tum...","[New, York, pension, bought, share, New, York,..."
3,25 Most Valuable Tech Companies Outside The US,"In this article, we will take a look at the 25...",BABA,2024-02-16,Alibaba Group Holding Limited,"[25, Valuable, Tech, Companies, Outside, US]","[article, ,, take, look, 25, valuable, tech, c..."
4,No Clear Sign of a Turnaround for Alibaba,Taobao and TMall Group's troubles overshadow g...,BABA,2024-02-16,Alibaba Group Holding Limited,"[Clear, Sign, Turnaround, Alibaba]","[Taobao, TMall, Group, 's, trouble, overshadow..."
...,...,...,...,...,...,...,...
221,Country Garden to Make CNY3.07 Billion Disposa...,Chinese property giant Country Garden will sel...,2007.HK,2023-12-14,Country Garden Holdings Company Limited,"[Country, Garden, Make, CNY3.07, Billion, Disp...","[Chinese, property, giant, Country, Garden, se..."
222,Country Garden sells stake in Dalian Wanda uni...,HONG KONG (Reuters) -China's embattled Country...,2007.HK,2023-12-14,Country Garden Holdings Company Limited,"[Country, Garden, sell, stake, Dalian, Wanda, ...","[HONG, KONG, (, Reuters, ), -China, 's, embatt..."
223,UPDATE 1-Country Garden has remitted funds to ...,Embattled Chinese developer Country Garden Hol...,2007.HK,2023-12-13,Country Garden Holdings Company Limited,"[UPDATE, 1-Country, Garden, remitted, fund, re...","[Embattled, Chinese, developer, Country, Garde..."
224,Country Garden repays US$111 million bond in f...,Embattled property developer Country Garden Ho...,2007.HK,2023-12-13,Country Garden Holdings Company Limited,"[Country, Garden, repays, US, $, 111, million,...","[Embattled, property, developer, Country, Gard..."


In [55]:
# sentiment

from textblob import TextBlob

def Sentiment_Analyze_com(preprossesd_token):
    
    
    text=" ".join(preprossesd_token)
    
    blob=TextBlob(text)
    # polarity of the text
    polarity=blob.sentiment.polarity
    
    if polarity>0:
        return 1
    elif polarity<0:
        return -1
    else:
        return 0

In [56]:
# titel sentiment
merge_data['titel_Sentiment']=merge_data['Titel_preprosses'].apply(Sentiment_Analyze_com)

In [57]:
# content sentiment
merge_data['Content_Sentiment']=merge_data['Content_preprosses'].apply(Sentiment_Analyze_com)

In [58]:
merge_data

Unnamed: 0,Title,Content,Abbreviation,date,Company_Name,Titel_preprosses,Content_preprosses,titel_Sentiment,Content_Sentiment
0,Chinese start-up Moonshot AI raises US$1 billi...,Chinese artificial intelligence start-up Moons...,BABA,2024-02-20,Alibaba Group Holding Limited,"[Chinese, start-up, Moonshot, AI, raise, US, $...","[Chinese, artificial, intelligence, start-up, ...",1,1
1,Alibaba Stock Falls as Chinese Markets Disappo...,Shanghai stocks advanced on Monday but posted ...,BABA,2024-02-19,Alibaba Group Holding Limited,"[Alibaba, Stock, Falls, Chinese, Markets, Disa...","[Shanghai, stock, advanced, Monday, posted, re...",1,1
2,Big Pension Bought Up NYCB Stock Right Before ...,A New York pension bought up shares of New Yor...,BABA,2024-02-19,Alibaba Group Holding Limited,"[Big, Pension, Bought, NYCB, Stock, Right, Tum...","[New, York, pension, bought, share, New, York,...",1,1
3,25 Most Valuable Tech Companies Outside The US,"In this article, we will take a look at the 25...",BABA,2024-02-16,Alibaba Group Holding Limited,"[25, Valuable, Tech, Companies, Outside, US]","[article, ,, take, look, 25, valuable, tech, c...",0,1
4,No Clear Sign of a Turnaround for Alibaba,Taobao and TMall Group's troubles overshadow g...,BABA,2024-02-16,Alibaba Group Holding Limited,"[Clear, Sign, Turnaround, Alibaba]","[Taobao, TMall, Group, 's, trouble, overshadow...",1,-1
...,...,...,...,...,...,...,...,...,...
221,Country Garden to Make CNY3.07 Billion Disposa...,Chinese property giant Country Garden will sel...,2007.HK,2023-12-14,Country Garden Holdings Company Limited,"[Country, Garden, Make, CNY3.07, Billion, Disp...","[Chinese, property, giant, Country, Garden, se...",0,-1
222,Country Garden sells stake in Dalian Wanda uni...,HONG KONG (Reuters) -China's embattled Country...,2007.HK,2023-12-14,Country Garden Holdings Company Limited,"[Country, Garden, sell, stake, Dalian, Wanda, ...","[HONG, KONG, (, Reuters, ), -China, 's, embatt...",0,-1
223,UPDATE 1-Country Garden has remitted funds to ...,Embattled Chinese developer Country Garden Hol...,2007.HK,2023-12-13,Country Garden Holdings Company Limited,"[UPDATE, 1-Country, Garden, remitted, fund, re...","[Embattled, Chinese, developer, Country, Garde...",1,-1
224,Country Garden repays US$111 million bond in f...,Embattled property developer Country Garden Ho...,2007.HK,2023-12-13,Country Garden Holdings Company Limited,"[Country, Garden, repays, US, $, 111, million,...","[Embattled, property, developer, Country, Gard...",1,1


In [59]:
merge_data.drop(columns=['Titel_preprosses','Content_preprosses'],inplace=True)

In [60]:
# Exporting the dataset
merge_data.to_csv('China_dataset1.csv')

In [61]:
merge_data

Unnamed: 0,Title,Content,Abbreviation,date,Company_Name,titel_Sentiment,Content_Sentiment
0,Chinese start-up Moonshot AI raises US$1 billi...,Chinese artificial intelligence start-up Moons...,BABA,2024-02-20,Alibaba Group Holding Limited,1,1
1,Alibaba Stock Falls as Chinese Markets Disappo...,Shanghai stocks advanced on Monday but posted ...,BABA,2024-02-19,Alibaba Group Holding Limited,1,1
2,Big Pension Bought Up NYCB Stock Right Before ...,A New York pension bought up shares of New Yor...,BABA,2024-02-19,Alibaba Group Holding Limited,1,1
3,25 Most Valuable Tech Companies Outside The US,"In this article, we will take a look at the 25...",BABA,2024-02-16,Alibaba Group Holding Limited,0,1
4,No Clear Sign of a Turnaround for Alibaba,Taobao and TMall Group's troubles overshadow g...,BABA,2024-02-16,Alibaba Group Holding Limited,1,-1
...,...,...,...,...,...,...,...
221,Country Garden to Make CNY3.07 Billion Disposa...,Chinese property giant Country Garden will sel...,2007.HK,2023-12-14,Country Garden Holdings Company Limited,0,-1
222,Country Garden sells stake in Dalian Wanda uni...,HONG KONG (Reuters) -China's embattled Country...,2007.HK,2023-12-14,Country Garden Holdings Company Limited,0,-1
223,UPDATE 1-Country Garden has remitted funds to ...,Embattled Chinese developer Country Garden Hol...,2007.HK,2023-12-13,Country Garden Holdings Company Limited,1,-1
224,Country Garden repays US$111 million bond in f...,Embattled property developer Country Garden Ho...,2007.HK,2023-12-13,Country Garden Holdings Company Limited,1,1


In [62]:
remaining_company_names.count()

Company_Name    33
Abbreviation    33
dtype: int64

In [63]:
remaining_company_names = data[~data['Company_Name'].isin(merge_data['Company_Name'])]
remaining_company_names

Unnamed: 0,Company_Name,Abbreviation
2,China Mobile Communications Corporation,CHL
5,Ping An Insurance (Group) Company of China,"Ltd.,PNGAY"
6,Huawei Technologies Co.,"Ltd.,002502.SZ"
7,JD.com,"Inc.,JD"
8,China National Petroleum Corporation,SNP
9,Sinopec Group,SHI
10,China State Construction Engineering Corporation,601668.SS
11,China Life Insurance Company Limited,LFC
12,China Merchants Bank Co.,"Ltd.,600036.SS"
14,China Resources,00291.HK


In [64]:
log(3/2)

NameError: name 'log' is not defined