In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import re
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
stop_words = stopwords.words('english')

from sklearn.metrics.pairwise import cosine_similarity
import networkx as netx

# Enable logging for gensim - optional
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.ERROR)

import warnings
warnings.filterwarnings("ignore",category=DeprecationWarning)

In [11]:
newsarticles = pd.read_csv('news_articles.csv')
newsarticles.shape

(180, 6)

In [12]:
newsarticles.head()

Unnamed: 0.1,Unnamed: 0,Title,date,Article,Category,PageLinks
0,0,"For some, the pandemic is an opportunity to st...","September 6, 2020","By six every morning, Kinchu Lhamo Bhutia is o...",National,https://kathmandupost.com/national/2020/09/06/...
1,1,Smugglers in Dadeldhura haphazardly fell trees...,"September 6, 2020",Hundreds of trees have been felled down on the...,National,https://kathmandupost.com/sudurpaschim-provinc...
2,2,Around 40 percent government staff haven’t rec...,"September 6, 2020",The Rautahat District Treasury Comptroller’s O...,National,https://kathmandupost.com/national/2020/09/06/...
3,3,Relief distribution programme in Birgunj fails...,"September 6, 2020","On Thursday, many residents of Ranighat in Bir...",National,https://kathmandupost.com/province-no-2/2020/0...
4,4,Job schemes come into question as virus-induce...,"September 6, 2020","Last week, when a group of Nepalis were crossi...",National,https://kathmandupost.com/national/2020/09/06/...


In [13]:
newsarticles.drop('Unnamed: 0',axis=1, inplace=True)

In [23]:
# Category of news
newsarticles.Category.unique()

array(['National', 'Politics', 'Valley', 'Opinion', 'Money', 'Sports',
       'Health', 'Food', 'Science & Technology'], dtype=object)

In [14]:
data = newsarticles.copy()
data.head()

Unnamed: 0,Title,date,Article,Category,PageLinks
0,"For some, the pandemic is an opportunity to st...","September 6, 2020","By six every morning, Kinchu Lhamo Bhutia is o...",National,https://kathmandupost.com/national/2020/09/06/...
1,Smugglers in Dadeldhura haphazardly fell trees...,"September 6, 2020",Hundreds of trees have been felled down on the...,National,https://kathmandupost.com/sudurpaschim-provinc...
2,Around 40 percent government staff haven’t rec...,"September 6, 2020",The Rautahat District Treasury Comptroller’s O...,National,https://kathmandupost.com/national/2020/09/06/...
3,Relief distribution programme in Birgunj fails...,"September 6, 2020","On Thursday, many residents of Ranighat in Bir...",National,https://kathmandupost.com/province-no-2/2020/0...
4,Job schemes come into question as virus-induce...,"September 6, 2020","Last week, when a group of Nepalis were crossi...",National,https://kathmandupost.com/national/2020/09/06/...


We will be using the pre-trained **Wikipedia 2014 + Gigaword 5** GloVe vectors for word_embedding. Glove word embeddings are the vector representation of the words.

In [7]:
word_embeddings = {}
f = open('glove.6B.100d.txt', encoding='utf-8')
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    word_embeddings[word] = coefs
f.close()

In [8]:
len(word_embeddings)

400000

In [15]:
# function to remove stopwords
def remove_stopwords(sen):
    sen_new = " ".join([i for i in sen.split() if i not in stop_words])
    return sen_new

# this function is used to preprocess newsarticles
def preprocess(text):
    sentences = sent_tokenize(text)
    neat_sentences = pd.Series(sentences).str.replace("[^a-zA-Z]", " ")  # removes all special characters
    neat_sentences = [s.lower() for s in neat_sentences]
    neat_sentences = [remove_stopwords(sent) for sent in neat_sentences]
    return neat_sentences,sentences

# vectorizing each sentences in the article
def vectorize_sentences(cleaned_sentences):
    sent_vectors = []
    for i in cleaned_sentences:
        if len(i) != 0:
            vector = sum([word_embeddings.get(w, np.zeros((100,))) for w in i.split()])/(len(i.split())+0.001)
        else:
            vector = np.zeros((100,))
        sent_vectors.append(vector)
    
    return sent_vectors
        


In [16]:
from sklearn.metrics.pairwise import cosine_similarity
def similarity_matrix(sentences,sentence_vectors):
    simi_mat = np.zeros([len(sentences), len(sentences)])
    for i in range(len(sentences)):
        for j in range(len(sentences)):
            if i != j:
                simi_mat[i][j] = cosine_similarity(sentence_vectors[i].reshape(1,100), sentence_vectors[j].reshape(1,100))[0,0]
    return simi_mat

## applying pagerank algorithm 
def pageRank(sentences,simi_matrix):
    nx_graph = netx.from_numpy_array(simi_matrix)
    scores = netx.pagerank(nx_graph)
    return scores
    

In [17]:
def summarize(article_title):
    articletext = newsarticles[newsarticles.Title == article_title].Article
    list_corpus = list(articletext)
    preprocessed_sent, sentences = preprocess(list_corpus[0])
    
    sentence_vector = vectorize_sentences(preprocessed_sent)
    
    sim_matrix = similarity_matrix(preprocessed_sent,sentence_vector)
    
    scores = pageRank(sentences,sim_matrix)
    ranked_sentences = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)
    top_sentences =[]
    for i in range(7):
        top_sentences.append(ranked_sentences[i][1])
        
    summary = '.'.join(top_sentences)
    return summary  

In [18]:
def related_articles(search_word):
    df_table = pd.DataFrame(columns = ["Date","Title","ArticleSummary"])
    summaries = []
    articles_df = newsarticles[newsarticles['Category'] == search_word]
    df_table.Date = list(articles_df.date)
    df_table.Title = list(articles_df.Title)
    # create summaries
    for title in list(df_table.Title):
        summaries.append(summarize(title))
    df_table.ArticleSummary = summaries
    #df_table.drop("word_ctr", axis=1, inplace = True)
    
    return df_table

pd.options.display.max_colwidth = 1500

In [19]:
## summary of 'National' newscategory
df_national = related_articles('National')
df_national

Unnamed: 0,Date,Title,ArticleSummary
0,"September 6, 2020","For some, the pandemic is an opportunity to start new businesses","“But ever since I started helping my father with his business, I got too busy to spend much time on baking.” But that changed during the lockdown..“Friends who had tried my food have always told me to start my own food business but I never really thought I would actually start it in the midst of a pandemic,” said Bhutia..She had always wanted to start a business focusing on serving the kind of Sikkimese food that she grew up eating..“During the lockdown, our family business had to be shut, and I had ample free time to bake, and I resumed baking and shared my baked products with family, friends, and neighbours,” said Manandhar..For Tamang and Bhutia, it was when their friends became repeat customers that they knew their business was going to do well..But when the orders started increasing, I realised having home delivery services makes things much more convenient for customers,” said Manandhar..But between helping her husband’s pashmina business in Kathmandu and raising her children, she had very little time to do anything else."
1,"September 6, 2020",Smugglers in Dadeldhura haphazardly fell trees on pretext of constructing road,"“We had informed the Sub Division Office about the haphazard tree felling but the forest employees did not pay any heed.” According to the Division Forest Office in Dadeldhura, forest officials also found sal logs hidden inside the houses of consumers four days ago..“The spokesperson at the Division Forest Office in Dadeldhura will provide additional information about the tree felling in Rajani Community Forest and its vicinity.” Meanwhile, Bishnu Acharya, chief at the Dadeldhura Forest Office, said, “We found that a large number of trees were felled down in collusion with forest employees in Aalital..“Smugglers, in collusion with forest officials and local political leaders, have been smuggling timber from the Aalital area for a long time..Forest officials are collecting details on the incident.” According to Acharya, two individuals, including the chairman of the Rajani Community Forest, have been arrested to make inquiries about the incident on August 24..The incident came to light on August 22 when forest officials confiscated a large quantity of green sal logs on the road section that connects Godam bazaar in Aalital to Dola Village..Locals said they cannot openly talk about the smugglers and timber smuggling in the forest because of an incident in which an individual, who provided information to the media about timber smuggling, was burnt alive by smugglers a decade ago..However, Santosh Bhatta, chief at the Aalital Sub Division Forest Office, claims he does not k..."
2,"September 6, 2020",Around 40 percent government staff haven’t received their salaries yet,"“As provincial government offices have not submitted their financial details for the 2019-20 fiscal, which ended two months ago, and the payment orders seeking funds for the salaries, not a single penny has been released as salary as of Friday,” said Ishwori Prasad Dhakal, chief of District Treasury Comptroller Office, Rautahat..Around 40 percent of government employees at both the federal and provincial levels across the country haven’t received the salary for Shrawan, according to the Financial Comptroller General’s Office..Usually, the district treasury offices start releasing funds for salaries weeks before the end of the month based on financial details submitted by the concerned government offices..“The district treasury offices have released funds for the salaries of only 60 percent of government employees so far,” said Deputy Financial Comptroller General Bhesh Prasad Bhurtel..For staffers at a government office to receive salaries, the office chief and chief accountant need to sign a payment order requesting the district treasury office to release funds..Even Kathmandu-based district treasury offices have not released the salaries of all staffers..“For example, the district treasury office in Singh Durbar has released the salaries of around 90 of the 100 offices under its ambit and the one in Teku has released the salaries of around 80 of the 87 offices it deals with,” said Bhurtel."
3,"September 6, 2020",Relief distribution programme in Birgunj fails to cover vulnerable families,"Khatun had also reached the police office on Thursday to receive relief but was sent away since her name was not on the list of needy people prepared by the police office..They asked us to come back on Thursday but after waiting all day long, they turned us away empty-handed.” More than 150 women from Sheerpur, Murli and Chhapaiya in Birgunj queued up outside the police office for two days to receive relief..They said they were giving relief only to those whose names were on the list and that we should go to the metropolis’ office..We have used up most of our savings and if the government does not provide us with food, we will die of hunger soon,” she said..“The District Police Office had decided to distribute leftover foodstuff from an earlier distribution drive to around 60 people..The office had collected data of the needy families in the metropolis but on Thursday those whose names were not listed had also reached the police office..“It’s becoming increasingly difficult for us to survive.” According to the District Police Office, the relief distribution programme was meant for those in extremely vulnerable situations."
4,"September 6, 2020",Job schemes come into question as virus-induced unemployment hits rural youths hard,"“That Nepalis are still migrating to India and also seeking labour permits for the Gulf only shows these schemes have not changed much for the groups that need jobs.” Two years into its implementation, the Prime Minister Employment Programme, which promises a minimum 100 days of employment for unemployed registered citizens, have not shown promising results..As part of the programme, the government aimed to create jobs for 200,000 people at the local level in this fiscal year..Such a desperate situation indicates that government programmes that promised jobs, even for a temporary period, have not reached targeted groups, according to Gurung..The programme priorities those who are from highly marginalised groups even among unemployed sections.” According to Ghimire, funds are being sent first to the local level this year, and they have been asked to update the number of registered unemployed citizens..The Ministry of Labour, Employment and Social Security says that the programme generated employment opportunities for its target of 60,ooo people across the country in the fiscal year 2019-20..“But there is no visible result, as people continue to migrate for basic needs.” Rather than providing seasonal employment, the Prime Minister Employment Programme should focus on asset-building by developing skills, along with providing jobs, according to Bhattarai..“With the increase in the number of unemployed, local units have been asked to come up with programmes where more peopl..."
5,"September 5, 2020",Police suspect illegal gambling dens are thriving during Covid-19 restrictions,"According to the data provided by Metropolitan Police Office, Ranipokhari a total of 22 people were rounded up from illegal gambling joints in Kathmandu Valley in the last six months..In the fiscal year 2018/19, police detained 2,869 people, the highest number of arrests in the past three years, from across the country..According to the data of the past three fiscal years, the number of people arrested for gambling offences has risen steadily..Every year, Nepal Police crack down on hundreds of illegal gambling dens which are operated mostly in private houses across the country and confiscate millions of rupees..The number of arrests on illegal gambling charges has gone down as a result, say police who suspect that illegal gambling dens have thrived during these past six months of Covid-19 lockdown and restrictions..According to the data maintained by the Metropolitan Police Office, Rani Pokhari, more than Rs 19 million was recovered from 1,161 suspected gamblers from different parts of the Valley in the fiscal year 2018/19..“We have not conducted that many raids in private houses in the past six months, as our personnel are busy due enforcing Covid-19 restrictions,” said Senior Superintendent of Police, Sushil Kumar Yadav, spokesperson for the Metropolitan Police Office, Ranipokhari."
6,"September 5, 2020",Overstay fines become a hurdle for Nepali workers waiting to return home from the UAE,"“They requested the officials to allow them to board the flight, but nothing happened.” The Nepal Airlines flight took off, leaving behind at least 17 workers at Dubai airport.Khem said even when the flights are available, many Nepali workers are unable to return home because of hefty overstay fines..“The government could have at least requested for the general amnesty for workers whose visas have expired and could not travel home because of the lockdown.” Khem and his three friends have a rescheduled flight on Sunday..There are many Nepali workers in the UAE, including Khem and his three friends, whose visas expired after the March 1 cutoff date and now face overstay fines and thereby unable to return home unless they pay the fines..These workers could not return home soon after their visa had expired because there were no flights to bring them home..“The only solution to this problem is that either the Nepal government should pay the fines or make the employers pay.” Sherpa said this situation would not have arrived had the Nepal government agreed to the UAE government’s proposal of flying the Nepali workers to Kathmandu for free of cost..After months of wait, Khem, a Nepali migrant worker in Dubai, managed to get a flight ticket to return home..“My three friends could not board their flight because they could not pay the overstay fines,” Khem said."
7,"September 5, 2020",Repatriation quandary persists even with funds to support migrant workers’ airfare,"The guidelines state that the concerned recruiting agencies and foreign missions must first ensure the migrant workers have not received air tickets or other financial aid from their employers or host nations before they get support from the welfare fund to return home..“For instance, the Nepal Embassy in the United Arab of Emirates has estimated that 15,000 Nepali workers may need the welfare fund’s support to return home.” The government has been facing criticism for the delay in repatriation of troubled migrant workers..He argues the guidelines simply instruct the Nepali missions to contact the employers, recruiting agencies, or the host governments and check if the workers are getting any financial support for airfare so that they could return home..It’s been more than two and half months since the court decision, and the government has still not provided the air tickets to the Nepali workers languishing in foreign countries..The Supreme Court on June 15 ordered the government to repatriate the Nepali migrant workers stranded in various labour destinations by using the Foreign Employment Welfare Fund..Only this time they are supposed to make sure whether the workers have received airfare support,” said Luitel, who was also a member of the committee that drafted the guidelines..“But we are not yet sure whether those workers have already returned to Nepal or not."
8,"September 5, 2020",How politicians' chopper rides to disaster-hit areas fly in the face of spirit of federalism,"“I do not see any point in ministers and leaders from Kathmandu making rounds of every other disaster-hit areas, as provincial and local governments are already in place to respond to such events” said Khimlal Devkota, an expert on federal affairs who also writes extensively on fiscal federalism..With the three tiers of government in place for the last three years, people’s representatives and the federal government should have worked to devolve power and strengthen sub-national governments, according to analysts..“Provincial and local governments are out of business, so the federal government should make them responsible.” In the past, some provinces have complained about the federal government’s tendency to undermine their authority, but there still seems to be confusion, according to analysts..The federal government is already facing criticism for holding on to the concept of chief district officers and making them work as the Home Ministry’s liaison in the districts even in the current federal set-up..In 2017, people’s representatives were elected in 6,473 wards of 753 local bodies across the country to ensure that everyone has their representation in decision-making and that state through its various arms, like wards, will ensure service delivery, promptly respond in the times of crisis and listen to the people’s plight..While there is a worldwide practice of prime ministers or presidents visiting the places hit by a disaster or crisis of a large scale, such events a..."
9,"September 4, 2020",Race begins for new finance minister as Oli bids farewell to Khatiwada,"Since another ministry is also vacant, now a Cabinet reshuffle is also likely, according to a leader close to Prime Minister KP Sharma Oli..Yubaraj Khatiwada resigned as finance and information and communication technology minister on Friday, a day after the ruling Nepal Communist Party decided not to nominate him, again, as a member of the National Assembly, a prerequisite for him to continue in the Oli Cabinet..Leaders from the Dahal-Nepal faction said the Oli himself proposed Gautam for the Upper House as soon as the Secretariat meeting began on Thursday, sensing that annoying Gautam could push him into the minority..“It had become obvious that the prime minister would bid farewell to the finance and communication minister, which he did today,” said Subash Nembang, a Standing Committee member who is a close confidante of Oli..Nor have I expressed my desire to become a minister before any party committees,” Shrestha told the Post..Gautam himself has made public his ambitions to become a minister, but it is not clear if Oli will let him lead the Finance Ministry..Another leader said that both the ministries led by Khatiwada could be given to Gyawali."


In [21]:
df_national['ArticleSummary'][0]

'“But ever since I started helping my father with his business, I got too busy to spend much time on baking.”  But that changed during the lockdown..“Friends who had tried my food have always told me to start my own food business but I never really thought I would actually start it in the midst of a pandemic,” said Bhutia..She had always wanted to start a business focusing on serving the kind of Sikkimese food that she grew up eating..“During the lockdown, our family business had to be shut, and I had ample free time to bake, and I resumed baking and shared my baked products with family, friends, and neighbours,” said Manandhar..For Tamang and Bhutia, it was when their friends became repeat customers that they knew their business was going to do well..But when the orders started increasing, I realised having home delivery services makes things much more convenient for customers,” said Manandhar..But between helping her husband’s pashmina business in Kathmandu and raising her children, 