In [1]:
from pprint import pprint
from collections import Counter
import os
import re
import logging
import string
import pickle
import numpy as np
import pandas as pd
import smart_open
import multiprocessing
from time import time  # To time our operations
from collections import defaultdict  # For word frequency

# Gensim
import gensim
from gensim.models import Word2Vec
from gensim.utils import simple_preprocess
from gensim.models.phrases import Phrases, Phraser

# NLTK
import nltk
from nltk.corpus import stopwords
'''nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')'''
from nltk import sent_tokenize, word_tokenize
from nltk.stem import WordNetLemmatizer
wordnet_lemmatizer = WordNetLemmatizer()

# Spacy
import spacy

# Plotting
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import HoverTool, ColumnDataSource, value

# Clustering
from sklearn.cluster import KMeans
from sklearn.neighbors import KDTree
from sklearn.manifold import TSNE

# Suppressing warnings
import warnings
warnings.simplefilter("ignore", DeprecationWarning)



In [2]:

def clean_text(text):
    # Normalize tabs and remove newlines
    no_tabs = text.replace('\t', ' ').replace('\n', '').replace('Belt and Road', 'BRI').replace('- ', '').replace(
        'PR Newswire', '').replace('The Straits Times', '').replace('Singapore Press Holdings Limited', '').replace(
        'All Rights Reserved', '').replace('globaltimes.com.cn', '').replace('PRNewswire','');
    # Remove all characters except A-Z and a dot.
    no_url = re.sub('Online: ((www\.[^\s]+)|(https?://[^\s]+))', '', no_tabs)
    alphas_only = re.sub("[^a-zA-Z\.]", " ", no_url);
    # Normalize spaces to 1
    multi_spaces = re.sub(" +", " ", alphas_only);
    # Strip trailing and leading spaces
    no_spaces = multi_spaces.strip();

    return no_spaces


def sentence_tokenize(text):
    sentence_doc = sent_tokenize(text)
    sentences = [gensim.utils.simple_preprocess(str(doc), deacc=True) for doc in
                 sentence_doc]  # deacc=True removes punctuations
    stop = set(stopwords.words('english') + ['factiva', 'asianreview', 'viewpoint', 'sourceupdate', 'stimes', 'prn', 'st'])
    no_stop = [[word for word in sentence if word not in stop] for sentence in sentences]

    return no_stop


def lemmatization(texts, allowed_postags=['NOUN']):
    """https://spacy.io/api/annotation"""
    texts_out = [[token.lemma_ for token in text if token.pos_ in allowed_postags] for text in texts]
    return texts_out

In [3]:
import os

In [4]:

path=os.listdir(r"C:\Users\Li\Desktop\中国（txt）")
datalist=[]

for i in path:
    domain= r"C:\\Users\\Li\\Desktop\\中国（txt）\\"+i
    #print(domain)
    with open(domain,"r",encoding="utf-8") as f:
        data=f.read()
        datalist.append(data)

In [5]:
text_li=[clean_text(i) for i in datalist]

In [7]:
com_sent_li = [sentence_tokenize(text) for text in text_li]
com_sent_li[0][0]

['china',
 'seeks',
 'pragmatic',
 'cooperation',
 'despite',
 'us',
 'indo',
 'pacific',
 'strategythe',
 'divergences',
 'china',
 'us',
 'going',
 'time',
 'west',
 'pacificregion',
 'throes',
 'tension',
 'two',
 'major',
 'powers']

In [8]:
sent_li = []
for sentence in com_sent_li:
    for tokens in sentence:
        sent_li.append(tokens)


In [121]:
sent_li = [tokens for sentence in com_sent_li for tokens in sentence]
sent_li

[['china',
  'seeks',
  'pragmatic',
  'cooperation',
  'despite',
  'us',
  'indo',
  'pacific',
  'strategythe',
  'divergences',
  'china',
  'us',
  'going',
  'time',
  'west',
  'pacificregion',
  'throes',
  'tension',
  'two',
  'major',
  'powers'],
 ['china',
  'us',
  'us',
  'becoming',
  'increasingly',
  'anxious',
  'global',
  'hegemony',
  'us',
  'labeled',
  'china',
  'revisionist',
  'power',
  'strategic',
  'competitor'],
 ['americanpeople',
  'deep',
  'belief',
  'thucydides',
  'trap',
  'leads',
  'increasing',
  'us',
  'strategic',
  'suspicion',
  'toward',
  'china',
  'addition',
  'us',
  'believes',
  'china',
  'theory',
  'peaceful',
  'development',
  'logicallyvalid',
  'many',
  'countries',
  'versions',
  'called',
  'indo',
  'pacific',
  'strategy',
  'asaustralia',
  'japan',
  'india',
  'us'],
 ['less',
  'aim',
  'countering',
  'coordinating',
  'againstthe',
  'china',
  'proposed',
  'bri',
  'initiative',
  'bri',
  'reflects',
  'anxi

In [122]:
bigram = Phrases(sent_li, min_count=5, threshold=80)
trigram = Phrases(bigram[sent_li], threshold=80)  
bigram_mod = Phraser(bigram)
trigram_mod = Phraser(trigram)
trigrams = [trigram_mod[bigram_mod[sentence]] for sentence in sent_li]

In [124]:
num_features = 30        # Word vector dimensionality (how many features each word will be given)
min_word_count = 2        # Minimum word count to be taken into account
num_workers = 8       # Number of threads to run in parallel (equal to your amount of cores)
context = 5              # Context window size
downsampling = 0 #1e-2    # Downsample setting for frequent words
seed_n = 1                # Seed for the random number generator (to create reproducible results) 
sg_n = 1                  # Skip-gram = 1, CBOW = 0

model = Word2Vec(trigrams, workers=num_workers, \
            vector_size=num_features, min_count = min_word_count, \
            window = context, sample = downsampling, seed=seed_n, sg=sg_n)

In [103]:
from gensim.models import KeyedVectors

model.save("word2vec.vec")

model = Word2Vec.load("word2vec.vec")
#model = KeyedVectors.load("word2vec.vec")


In [95]:
#print('{:,} terms in the vocabulary.'.format(len(model.wv)))
#print(type(model.wv.key_to_index))     <class 'dict'>
#print(model.wv.index_to_key)
#print(model.wv)


import numpy as np

for key in model.wv.index_to_key:
    vec=model.wv.word_vec(key)
    ve=vec.tolist()
    dict[key]=ve
dict['china']

np=[]

#vec=model.wv.word_vec('china')
#vec.tolist()

[-0.006829362828284502,
 -0.41194427013397217,
 0.16235896944999695,
 0.7360480427742004,
 0.10764424502849579,
 -0.7745802402496338,
 0.8226320743560791,
 0.9299185872077942,
 -0.2434241771697998,
 1.2656981945037842,
 0.821933925151825,
 -0.24500641226768494,
 0.26630643010139465,
 -0.256115585565567,
 -1.6612622737884521,
 0.17267177999019623,
 -1.0878853797912598,
 -0.461504727602005,
 -0.6038709282875061,
 -0.23654767870903015,
 -0.3068745732307434,
 1.2953227758407593,
 -0.3908064365386963,
 -0.13675065338611603,
 -0.33174166083335876,
 0.6663362979888916,
 -0.451118528842926,
 0.759854793548584,
 -0.5581774711608887,
 -0.8537824153900146]

In [116]:
# build a list of the terms, integer indices, and term counts from the word2vec model vocabulary
#ordered_vocab = [(term, voc.index, voc.count) for term, voc in model.wv.vocab.items()]

    

# sort by the term counts, so the most common terms appear first
#ordered_vocab.sort(key = lambda x: x[2])  

# unzip the terms, integer indices, and counts into separate lists
#ordered_terms, term_indices, term_counts = zip(*ordered_vocab)

# create a DataFrame with the vectors as data, and the terms as row labels
#word_vectors = pd.DataFrame(model.wv.syn0norm[term_indices, :], index=ordered_terms)
word_vectors = pd.DataFrame(dict)
#word_vectors.head()
word_vectors.values

array([[-0.08887488, -0.00682936, -0.49185786, ..., -0.06418832,
        -0.05794762, -0.05521619],
       [-0.13042285, -0.41194427, -0.59202433, ..., -0.12239486,
        -0.15514809, -0.12179872],
       [ 0.22738865,  0.16235897,  0.92098963, ...,  0.19038519,
         0.24113241,  0.23117705],
       ...,
       [ 0.23425552,  0.75985479, -0.63256282, ...,  0.25502014,
         0.26306891,  0.22173329],
       [-0.08528728, -0.55817747, -1.45015097, ..., -0.12311564,
        -0.13694431, -0.14032674],
       [-0.13730435, -0.85378242,  0.34991029, ..., -0.15586439,
        -0.20258024, -0.15567994]])

In [105]:
tsne = TSNE()
tsne_vectors = tsne.fit_transform(word_vectors.values)

In [106]:
tsne_vectors = pd.DataFrame(tsne_vectors,
                            index=pd.Index(word_vectors.index),
                            columns=['x_coord', 'y_coord'])

In [111]:
tsne_vectors.head(10)

Unnamed: 0,x_coord,y_coord,word
0,-22.064526,65.333084,0
1,221.429672,105.448265,1
2,84.157242,231.869354,2
3,-234.525787,36.516891,3
4,-122.570724,-203.238831,4
5,11.661753,-17.106663,5
6,-138.688126,197.984039,6
7,202.18161,-115.921364,7
8,-159.291336,-24.583752,8
9,-76.799011,129.948364,9


In [113]:
tsne_vectors['word'] = tsne_vectors.index

In [114]:
output_notebook()

In [115]:
# add our DataFrame as a ColumnDataSource for Bokeh
plot_data = ColumnDataSource(tsne_vectors)

# create the plot and configure the
# title, dimensions, and tools
tsne_plot = figure(title='t-SNE Word Embeddings',
                   plot_width = 800,
                   plot_height = 800,
                   tools= ('pan, wheel_zoom, box_zoom, box_select, reset, reset'),
                   active_scroll='wheel_zoom')

# add a hover tool to display words on roll-over
tsne_plot.add_tools(HoverTool(tooltips = '@word'))

# draw the words as circles on the plot
tsne_plot.circle('x_coord', 'y_coord', source=plot_data,
                 color='blue', line_alpha=0.2, fill_alpha=0.1,
                 size=10, hover_line_color='black')

# configure visual elements of the plot
#tsne_plot.title.text_font_size = value('14pt')
tsne_plot.xaxis.visible = False
tsne_plot.yaxis.visible = False
tsne_plot.grid.grid_line_color = None
tsne_plot.outline_line_color = None

# show the plot
show(tsne_plot);



In [101]:
def clustering_on_wordvecs(word_vectors, num_clusters):
    # Initalize a k-means object and use it to extract centroids
    kmeans_clustering = KMeans(n_clusters = num_clusters, init='k-means++');
    idx = kmeans_clustering.fit_predict(word_vectors);
    
    return kmeans_clustering.cluster_centers_, idx;

Z = model.syn0

AttributeError: 'Word2Vec' object has no attribute 'syn0'

In [98]:
centers, clusters = clustering_on_wordvecs(Z, 10);
centroid_map = dict(zip(model.wv.index2word, clusters));

NameError: name 'Z' is not defined

In [91]:
def get_top_words(index2word, k, centers, wordvecs):
    tree = KDTree(wordvecs);
    # Use closest points for each cluster center to query closest 20 points to it
    closest_points = [tree.query(np.reshape(x, (1, -1)), k=k) for x in centers];
    closest_words_idxs = [x[1] for x in closest_points];
    # Query Word Index  for each position in the above array, and added to a Dictionary
    closest_words = {};
    for i in range(0, len(closest_words_idxs)):
        closest_words['Cluster #' + str(i)] = [index2word[j] for j in closest_words_idxs[i][0]]
    # Create DataFrame from dictionary
    df = pd.DataFrame(closest_words);
    df.index = df.index+1
    return df

In [93]:
top_words = get_top_words(model.wv.index_to_key, 5000, centers, Z);

NameError: name 'centers' is not defined

In [117]:
from sklearn.decomposition import PCA
from matplotlib import pyplot as plt

In [118]:
model.wv.save_word2vec_format('word_model2.txt', binary=False) 

f = open("word_model.txt","r")
new=[]
for line in f:
    new.append(line)
new[0]='\n'
f.close()

f = open("word_model.txt","w")
for n in new:
    f.write(n)
f.close()

import csv
with open('data.csv', 'w', newline='') as csvfile:      ##data.csv是用来存放词向量的csv文件
    writer = csv.writer(csvfile)
    data = open('word_model.txt')
    for each_line in data:
        a = each_line.split()
        writer.writerow(a)
        
import numpy as np
from sklearn.decomposition import PCA
from matplotlib import pyplot as plt
l = []
words=[]
with open('data.csv', 'r') as fd:
    line = fd.readline()
    line=fd.readline()
    while line:
        if line == "":
            continue
        line = line.strip()
        word = line.split(",")
        words.append(word[0])
        l.append(word[1:])
        line = fd.readline()
        
X = np.array(l)  #导入数据，维度为300
pca = PCA(n_components=2)   #降到2维
pca.fit(X)                  #训练
newX=pca.fit_transform(X)   #降维后的数据存放在newX列表中

dict={}
for i in range(len(words)):
    word_=words[i]
    dict[word_]=newX[i]
for j in range(len(words)):
    print(words[j]+':',end='')
    print(dict[words[j]])

said:[-0.16906241 -0.31901364]
cpec:[-0.63895954 -0.11899636]
economic:[-0.72718692 -0.27622063]
chinese:[ 0.24023866 -0.15387759]
countries:[-0.63233222 -0.46933011]
cooperation:[-0.50698414 -0.7805454 ]
bri:[-0.27880757 -0.43560054]
development:[-0.69666215 -0.26218135]
also:[-0.03123591  0.01882387]
people:[-0.44686405 -0.49054149]
initiative:[-0.54441062 -0.47542454]
would:[-0.74666387  0.02767006]
two:[-0.23805932 -0.57571394]
international:[ 0.16085773 -0.38245411]
projects:[-0.67935446  0.5973111 ]
world:[-0.31575934 -0.33748717]
corridor:[-0.62462217 -0.09428611]
project:[-0.58903115  0.50956279]
government:[ 0.0023483  -0.10846031]
added:[-0.49666015 -0.13547976]
trade:[-0.78569568  0.06448441]
pakistani:[0.01962296 0.14829455]
country:[-0.58145259  0.09939121]
regional:[-0.8098789  -1.12433842]
foreign:[ 0.34983507 -0.62292255]
relations:[-0.23108118 -1.3471084 ]
minister:[ 0.78332231 -0.63283348]
new:[-0.71249731  0.17825637]
investment:[-0.52487248  0.44882234]
region:[-0.7

commercial:[-0.16038732  0.55118502]
possible:[-0.70071973 -0.05931452]
free_trade:[-0.59202919  0.1987928 ]
believe:[-0.99561805 -0.85931739]
included:[0.57081253 0.45788992]
much:[-0.81319813  0.11513488]
closer:[-0.74491437 -1.09978159]
offered:[-0.2767898   0.27347311]
goods:[-0.68221552  1.32901545]
met:[ 1.08943364 -0.90040756]
complete:[-0.47567057 -0.29225919]
poverty_alleviation:[-0.6561209   0.24176678]
economies:[-1.30281522  0.28904654]
meet:[-0.2077983   0.00968027]
achievements:[-0.60990569 -0.49231411]
increasing:[-0.83541374  0.23073004]
film:[0.52909141 0.46890285]
stage:[-0.54649027  0.33045101]
urged:[-0.23206429 -0.40946048]
built:[-0.19962133  0.93472577]
lahore:[0.98681039 0.78928362]
asean:[-0.07879958 -0.2235455 ]
electricity:[-0.88738868  1.60956482]
participate:[ 0.76973952 -0.13710166]
crisis:[-0.89235116  0.17466846]
decades:[-0.64687065 -0.41989405]
significance:[-0.41973238 -0.63781137]
society:[-0.12952849 -0.01710449]
welcomed:[ 0.56336614 -0.84984296]
s

enjoy:[-0.34179798 -0.67996851]
moreover:[-0.43230091  0.50559488]
made_remarks:[ 0.65332143 -0.69389637]
student:[0.98414723 0.70286306]
workers:[0.00533732 0.87260248]
brotherly:[-0.11883551 -1.21718252]
adb:[ 0.26212022 -0.02723014]
visiting:[ 1.10905374 -0.19485346]
epidemic:[-0.5418828  -0.00626022]
ideas:[-0.63061335 -0.34575345]
milestone:[-0.23121032 -0.39364039]
responsibility:[-0.43902372 -0.49050287]
implement:[-0.78525033 -0.45818863]
lauded:[ 0.23900613 -0.6305941 ]
non:[-0.16892742 -0.151749  ]
ministers:[ 1.20973007 -0.70841011]
legal:[-0.05922279  0.29379562]
biggest:[-0.48212275  0.51678632]
unprecedented:[-0.70788768 -0.18859803]
trading:[-0.41570891  0.65756262]
collective:[-0.94666652 -1.26163259]
problem:[-0.90334259  0.19463971]
secretary_general:[ 1.27546771 -0.83316946]
highlighting:[-0.32115765 -0.35636785]
administration:[0.66501615 0.23359048]
acknowledged:[-0.09896001 -0.74740472]
tax:[-0.3003137   0.97667116]
round:[-0.01110167 -0.95963358]
yao_jing:[ 0.491

released:[ 0.57861425 -0.02122637]
efficiency:[-0.79880616  1.07927621]
executive:[ 1.41572816 -0.19610541]
zhao:[ 1.00493367 -0.12480254]
experiences:[-0.59345327 -0.50821212]
identified:[-0.44009127  0.1147686 ]
indeed:[-0.27815618 -0.31678308]
partnerships:[-1.01707999 -0.47658471]
offering:[-0.38536381  0.07762161]
studying:[0.6775113  1.03698908]
amount:[-0.5824995   0.83949076]
article:[ 0.07074995 -0.27074098]
followed:[ 0.21029105 -0.1168578 ]
workshop:[1.02232081 0.122198  ]
pipeline:[-0.45010094  1.16182967]
chair:[ 1.14088982 -0.85365228]
hangzhou:[ 0.92205355 -0.13427791]
depth:[-0.48799977 -0.73836385]
weather_friendship:[-0.44656615 -1.00782884]
socioeconomic:[-0.98460504  0.14601239]
covering:[0.0241944  1.08715281]
transparency:[-0.83406112 -0.01429984]
state_councilor:[ 0.8332794  -1.43103051]
negative:[-0.64471681 -0.14181853]
kind:[-0.48328436 -0.26585481]
outcomes:[-0.58012423 -0.41783443]
nationals:[0.10753939 0.0632348 ]
customs:[-0.33763381  0.88300506]
shipping:

pm_imran_khan:[ 0.30744773 -0.7577549 ]
import_expo:[ 0.90714651 -0.23599082]
territory:[ 0.06175445 -0.38229507]
ecological:[-0.26573886  0.52865006]
immediate:[-0.44902256 -0.23956924]
giant:[-0.26261699  0.26313918]
steady:[-1.15530981 -0.25021116]
drawn:[-0.15532422 -0.57133142]
way_forward:[-0.28217412 -0.74531877]
avoid:[-0.52744102 -0.60040644]
join_hands:[-0.5226554  -0.81912123]
symposium:[ 0.49993596 -0.34671111]
healthy:[-0.93049178 -0.04995671]
establishment_diplomatic:[ 0.03386483 -0.85382954]
celebrated:[ 0.82421868 -0.33576823]
underway:[-0.68312535  0.5041077 ]
great_hall:[ 1.1400134  -1.30829437]
body:[ 0.70516467 -0.08850132]
rejected:[-0.17332822 -0.30908119]
xinjiang_uygur:[0.35153677 0.73513749]
funds:[-0.32999327  0.51021101]
fpcci:[ 0.84970471 -0.0642345 ]
weather_strategic_cooperative_partners:[-0.32748864 -1.42298538]
reduced:[-0.88905318  0.83243341]
variety:[-0.08082494  0.46336706]
assembly:[0.81928617 0.15959142]
proved:[-0.58858137 -0.53475136]
referring:[

atmosphere:[ 0.10102992 -0.6594587 ]
features:[-0.23123129 -0.20844627]
state_owned:[0.26845942 0.7487185 ]
weather_strategic:[-0.09232087 -1.29803446]
article_published:[ 0.29435536 -0.14401469]
mw_electricity:[-0.65624188  1.35056766]
shanghai_spirit:[-0.73498441 -1.13838134]
shahbaz_sharif:[ 1.0578226 -0.1598801]
late:[ 0.01465167 -0.1118615 ]
chance:[-0.34875473 -0.05954938]
consider:[-0.32894729 -0.31864584]
characteristics:[ 0.15927365 -0.17718156]
regime:[-0.16075974 -0.32252033]
figures:[0.28515394 0.29871557]
natural_resources:[-0.66223253  0.51776674]
high_speed:[0.24241422 1.2876959 ]
settlement:[-0.53090871 -0.55967403]
adopt:[-0.67245166  0.03583953]
zhejiang:[1.00827337 0.08825651]
kashmir_dispute:[-0.30362907 -0.61006824]
faisalabad:[0.54274876 1.12192631]
spanning:[-0.41108718  0.04304722]
instance:[-0.44290884  0.87878448]
affect:[-0.40361239 -0.56437761]
rs_billion:[-0.2479653   1.34989134]
output:[-0.58992199  0.94042073]
emerge:[-0.76294208 -0.13004473]
spheres:[-0.

faces:[-0.53139934 -0.07410662]
magazine:[0.54248893 0.10895033]
reasons:[-0.30945047 -0.21674993]
textile_industry:[0.57751834 0.56621075]
covid_vaccine:[-0.15990741 -0.38857601]
cppcc:[ 1.28008328 -0.6424748 ]
internal_affairs:[-0.17383694 -0.73257298]
psx:[-0.18587402  0.84400237]
milk:[0.7795973  0.99415427]
responding:[-0.10486789 -0.38533816]
contain:[-0.0825407  -0.32161203]
seeds:[0.08669198 1.00380425]
suffered:[-0.50767352 -0.24521481]
thing:[-0.44055858 -0.06606379]
livestock:[0.4254907  0.36767307]
eager:[-0.35488126  0.17407462]
accept:[-0.17761461  0.06795095]
prime_minister_office:[ 0.91577425 -0.21408163]
entering:[-0.66786282  0.23208733]
usman:[0.86644887 0.52609022]
concessional:[-0.10636877  0.79753278]
primarily:[-0.61154663 -0.11738143]
maintain_close:[-0.39462809 -1.2796171 ]
assistant:[ 1.2121489  -0.61305618]
billions_dollars:[-0.29985802  0.34059179]
evident:[-0.57297076 -0.2538648 ]
power_shortage:[-0.62369493  0.66078646]
considers:[ 0.1687009 -0.3675505]
al

architecture:[-0.26733417  0.19061614]
constructive_role:[-0.39597816 -1.01433663]
acknowledging:[-0.07056693 -0.66254907]
thereby:[-0.62119242 -0.12457068]
roadmap:[-0.5537429  -0.14551907]
pleased:[-0.03081033 -0.23263074]
informal:[ 0.10127293 -0.29911546]
reform_commission:[0.29013199 0.02312493]
legitimate:[-0.28256298 -0.72912128]
roadshow:[0.45101325 0.21238145]
medicines:[0.49660034 0.26627098]
anyone:[-0.37798782 -0.18793039]
promise:[-0.47453808 -0.3501718 ]
arab:[ 0.32116043 -0.32659066]
leverage:[-0.53025702  0.13256881]
fashion:[0.42074435 0.42517508]
figure:[-0.178145    0.60943271]
opening_remarks:[ 0.65710991 -0.3699759 ]
generally:[-0.4190235  -0.12597571]
farooq:[ 1.4368426  -0.05071847]
issi:[ 0.76416382 -0.49909175]
bank_aiib:[-0.48821263  0.26607296]
fight_pandemic:[-0.52796309 -0.76555482]
stood_test:[-0.18877512 -0.90894236]
exchange_visits:[ 0.29612854 -0.48044376]
wb:[ 0.10646701 -0.03505062]
criticism:[-0.5496391  -0.33624196]
gas_pipeline:[-0.17599049  0.6029

opens:[-0.31168061 -0.29943193]
united_kingdom:[0.53869223 0.18872276]
successful_conclusion:[ 0.10333814 -0.53660616]
choose:[-0.21782981  0.10889526]
good_wishes:[ 0.68269639 -0.83303536]
allama_iqbal:[0.16135817 0.45765305]
sufficient:[-0.39931988  0.72929767]
usd:[-0.15324721  0.71880397]
considerable:[-0.57286904  0.40928051]
featuring:[-0.25205734 -0.40501445]
minded:[-0.55402042 -0.40242796]
negotiate:[0.07128036 0.07461291]
manzoor:[1.16463604 0.10037228]
tahir:[ 0.63573537 -0.06213405]
peshawar_karachi:[-0.20266623  0.95459791]
trump_administration:[ 0.20145062 -0.3011333 ]
adopting:[-0.25431643  0.42028555]
incident:[ 0.27036798 -0.32555792]
short_span:[-0.74082166  0.07417601]
naveed:[ 0.9529918 -0.2078281]
builders:[-0.05384002  0.21666398]
hassan_daud_butt:[ 0.15776596 -0.09251521]
ban:[0.58848165 0.36856391]
signal:[-0.0849782  -0.29944053]
kpezdmc:[0.04438275 0.77382531]
translating:[ 0.11877225 -0.50701403]
fo:[ 0.32217756 -0.63291818]
japan_south_korea:[0.04630035 0.24

issuance:[0.11009176 0.06136191]
eea:[0.34856276 0.1924265 ]
restraint:[ 0.03212749 -0.30322291]
debts:[-0.04793422  0.24881731]
munir:[ 0.430836   -0.50858082]
territorial_integrity:[-0.15289409 -0.79057656]
modi:[-0.03849394 -0.4405964 ]
worried:[ 0.05326382 -0.37321474]
substantive:[-0.73123127 -0.20942918]
quaid_azam_university:[ 0.96498069 -0.1086799 ]
corner:[-0.08088413  0.21021073]
smaller:[-0.12510942 -0.14819914]
par:[-0.58896053 -0.19377063]
putin:[ 0.11030838 -0.52223319]
natural_disasters:[-0.10055448  0.52913315]
optimum:[-0.77031169  0.18840879]
grants:[-0.27561128  0.85840686]
nafees_zakaria:[0.78319027 0.22961932]
injected:[-0.75280288 -0.2100493 ]
handle:[-0.41992548  0.00858788]
li_keqiang:[ 0.62709289 -0.72570572]
reservations:[-0.05385474 -0.35944501]
graduated:[0.48110067 0.73441686]
new_avenues:[-0.39106712 -0.68952432]
frameworks:[-0.07536779 -0.21338724]
easing:[-0.52261442  0.05854746]
holistic:[-0.76810352  0.20319044]
ecosf:[ 0.56196101 -0.12150689]
double_t

motherland:[ 0.0923974  -0.08693189]
capture:[0.00440367 0.33436784]
henan:[0.33892511 0.30748529]
lose:[-0.21094448 -0.08311182]
excited:[0.00226582 0.15049508]
arif:[0.77525859 0.30559442]
entertainment:[-0.25571649 -0.02854232]
mcc:[0.76478808 0.49151544]
bulk:[-0.25334148  0.80348675]
cooperative_partnership:[-0.10311319 -0.31072329]
determine:[-0.28794731 -0.19299362]
ancient_civilizations:[-0.30023717 -0.33710171]
traction:[-0.23527045  0.17659284]
central_eastern:[-0.38150796 -0.05241372]
outreach:[-0.53637063 -0.05279964]
thankful:[-0.1471371  -0.02392449]
network_highways:[-0.50878787  0.51834569]
inspire:[-0.25810804 -0.09885362]
turmoil:[-0.2413138  -0.16961436]
sha:[ 0.53628213 -0.46700988]
falls:[-0.23670754  0.16392315]
regimes:[-0.07703096 -0.05054695]
prime_minister_task_force:[0.99028303 0.27382854]
unleashed:[-0.34211393 -0.12729363]
best_wishes:[ 0.48700451 -0.46642495]
pathway:[-0.4944684  -0.12485326]
objection:[-0.11845354 -0.22659676]
malik_amin_aslam:[ 0.479611 

jhelum:[0.1956228  0.52787002]
net_reported:[0.2113974  0.05130955]
governmental:[-0.02199446 -0.19226987]
bakhtiar:[ 0.40963619 -0.05801681]
installation:[0.12226562 0.46962949]
advisory_council:[ 0.43750545 -0.10700008]
portal:[0.18239152 0.16094715]
convention_center:[ 0.6802206  -0.15708734]
cycle:[-0.40174174  0.16472482]
advent:[-0.33522659  0.25204912]
cai:[0.21015151 0.22252586]
clearance:[-0.35541104  0.40840692]
kenya:[ 0.47113378 -0.37933628]
pan:[ 0.17110199 -0.18644333]
muzaffarabad:[0.10197047 0.70344337]
iiui:[ 0.57866754 -0.16530296]
mahmoud:[0.01609719 0.00741948]
commemorative:[ 0.34259312 -0.18178536]
incoming:[0.20004588 0.22919711]
diaspora:[-0.04284831 -0.2436378 ]
raheel:[ 0.77254317 -0.05493882]
disabled:[-0.037605    0.25514611]
reflecting:[-0.06224352 -0.35930036]
weather_time_tested:[ 0.03067557 -0.55348785]
laboratory:[0.44502826 0.66134584]
unresolved:[-0.22759529 -0.28838013]
tehreek_insaf:[ 0.20960799 -0.03433639]
thar:[-0.24934134  1.05426628]
geo_politi

snow:[0.08778741 0.38719206]
museums:[ 0.19767151 -0.17728536]
saplings:[0.08207316 0.45149808]
cinemas:[0.44357462 0.11231414]
improves:[-0.30332916  0.38700935]
alleviate_poverty:[-0.56599046  0.10777504]
wonders:[-0.40209585 -0.01262911]
reiterates:[ 0.09495285 -0.69255402]
lpakistan:[ 0.05435074 -0.01430279]
ventilators:[0.27282058 0.50388677]
hailing:[ 0.03062082 -0.0807425 ]
integral:[-0.25250998 -0.21697849]
teacher:[0.35098628 0.26007886]
exclusively:[-0.17764761  0.17743418]
niazi:[ 0.25374639 -0.18674252]
saif:[0.78988005 0.04967068]
boao:[ 0.53672838 -0.13308326]
meaning:[-0.18972758 -0.317296  ]
defeated:[ 0.00327357 -0.18994253]
commemorate:[ 0.32580235 -0.40924205]
irrespective:[ 0.00360453 -0.41625995]
counter_negative_propaganda:[-0.35133047 -0.31728614]
appreciable:[-0.00208932 -0.10852786]
lavrov:[ 0.64209715 -0.56182945]
unionpay_cards:[0.18024383 0.08337492]
acknowledge:[-0.18930883 -0.22568727]
planet:[-0.12730279 -0.10161   ]
shifting_west_east:[-0.05010038 -0.004

jiang_jianguo:[ 0.805611   -0.35825939]
returning:[ 0.0099648  -0.06852074]
conventional:[-0.14361846  0.21981271]
muhammad_shahbaz:[0.69485886 0.28309439]
worry:[-0.1197517   0.12292883]
gokhale:[ 0.40115528 -0.08034168]
signals:[-0.1185591 -0.3946677]
malacca:[-0.29910592  0.31568349]
elite:[0.33255511 0.11697918]
mushtaq:[ 0.88143979 -0.05586183]
correctly:[-0.08067561  0.07348964]
presidency:[ 0.22389669 -0.31307836]
cooperated:[-0.31527843 -0.03966981]
demonstrating:[-0.32234075 -0.11709087]
khurram_dastgir:[ 0.34390623 -0.13951603]
mcpec:[0.07302819 0.05742494]
closing:[ 0.15768841 -0.014085  ]
middle_class:[-0.21597384  0.39170821]
zhou_qiang:[ 0.73957901 -0.27002275]
tourism_organization_unwto:[ 0.21270736 -0.02411397]
donation:[ 0.70989401 -0.06105548]
equip:[-0.00514546  0.35658324]
balance_payment:[-0.07926915  0.15420299]
logistic:[-0.14349547  0.42544131]
javed_jabbar:[0.39465672 0.01542374]
select_gathering:[0.09954462 0.02852076]
stakes:[ 0.14879726 -0.13021799]
nasser:[

buddhism:[0.18230918 0.24647761]
circulation:[-0.01345057  0.33424799]
cpaffc:[ 0.2534313  -0.39015024]
destruction:[-0.03586557  0.0613356 ]
archeological_sites:[ 0.0035671  -0.05475041]
envisions:[-0.19002888  0.05993251]
resistance:[0.01952723 0.04474638]
chiefs_staff_committee:[ 0.73937276 -0.07243157]
pesticides:[-0.01222166  0.66755057]
dissemination:[0.03512336 0.08540464]
diligence:[-0.15780186 -0.03640944]
biological_sciences:[0.66065086 0.41144866]
benchmark:[-0.3026716  -0.53281306]
jang:[0.68091842 0.14088678]
representation:[ 0.00650626 -0.1838639 ]
displaying:[0.27843365 0.06147062]
rafiq:[ 0.81797407 -0.03330864]
mainstreaming:[-0.11309677  0.19064207]
diploma:[0.24289234 0.32477651]
fullest:[-0.08013295 -0.29365622]
desires:[-0.08671137 -0.10285825]
benefitted:[ 0.02773283 -0.01192382]
likes:[0.04536967 0.19118784]
gaoli:[ 0.21686768 -0.32112097]
frame:[-0.18900721 -0.05615742]
shipyard:[-0.02418331  0.12502575]
meteoric:[ 0.24543418 -0.21469841]
normalization:[-0.19927

entirely:[-0.20195819  0.12891552]
supreme:[ 0.53422105 -0.2507062 ]
rely:[-0.09758067 -0.06807831]
accumulated:[-0.05342316  0.24454956]
assigned:[-0.02015526  0.09428999]
hearing:[0.20006754 0.0570694 ]
substantially:[-0.16787085  0.20192187]
provisions:[-0.00133402 -0.18889183]
relative:[-0.27071905  0.20509634]
jvs:[-0.41639373  0.3075174 ]
simple:[0.13402123 0.06248297]
jinnah:[0.79093345 0.16312581]
optics:[0.36824183 0.53667684]
encompasses:[-0.14969684 -0.12201305]
afforestation:[0.09693909 0.58996424]
seminal:[-0.07969936 -0.23087801]
adaptation:[0.02325025 0.29154011]
consequent:[-0.16008851  0.17301418]
conspiracy:[-0.24194171 -0.09786213]
rehana:[0.68123313 0.26424898]
removing:[0.05828121 0.06327604]
northwestern:[-0.27759078  0.50328219]
ausaf:[0.51034661 0.04229188]
extremists:[-0.00081092 -0.10610452]
hatred:[ 0.01340143 -0.28666425]
delhi:[-0.06612902 -0.16734146]
explaining:[ 0.0754715  -0.11236251]
hr:[0.27540554 0.05965226]
firdous:[ 0.42682884 -0.37293103]
limits:[