<a href="https://colab.research.google.com/github/Santosh3010/Advance-AI-Project/blob/main/AAI_Event_Extraction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#import the libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.utils import resample

#sklearn package 
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn. preprocessing import LabelEncoder,StandardScaler
from sklearn.decomposition import TruncatedSVD
from sklearn.pipeline import Pipeline
from sklearn.naive_bayes import ComplementNB,MultinomialNB,GaussianNB 

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV

from sklearn.ensemble import GradientBoostingClassifier

#model evaluation
from sklearn.metrics import accuracy_score,classification_report, confusion_matrix,recall_score,precision_score,f1_score


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [None]:
#read the dataset
data = pd.read_json('News_Category_Dataset_v2.json',lines=True) # lines for avoid the trailing error
column = data.columns
column

Index(['category', 'headline', 'authors', 'link', 'short_description', 'date'], dtype='object')

### Little inspection on the data


In [None]:
data.head(5)

Unnamed: 0,category,headline,authors,link,short_description,date
0,CRIME,There Were 2 Mass Shootings In Texas Last Week...,Melissa Jeltsen,https://www.huffingtonpost.com/entry/texas-ama...,She left her husband. He killed their children...,2018-05-26
1,ENTERTAINMENT,Will Smith Joins Diplo And Nicky Jam For The 2...,Andy McDonald,https://www.huffingtonpost.com/entry/will-smit...,Of course it has a song.,2018-05-26
2,ENTERTAINMENT,Hugh Grant Marries For The First Time At Age 57,Ron Dicker,https://www.huffingtonpost.com/entry/hugh-gran...,The actor and his longtime girlfriend Anna Ebe...,2018-05-26
3,ENTERTAINMENT,Jim Carrey Blasts 'Castrato' Adam Schiff And D...,Ron Dicker,https://www.huffingtonpost.com/entry/jim-carre...,The actor gives Dems an ass-kicking for not fi...,2018-05-26
4,ENTERTAINMENT,Julianna Margulies Uses Donald Trump Poop Bags...,Ron Dicker,https://www.huffingtonpost.com/entry/julianna-...,"The ""Dietland"" actress said using the bags is ...",2018-05-26


In [None]:
#delete the dublicate values
data.duplicated().sum() # count the total duplicate samples

13

In [None]:
#there can be dublicate of author names so check for the dublicate headline and short discription
data.duplicated(subset=['headline', 'short_description']).sum()

488

In [None]:
data.drop_duplicates(subset=['headline', 'short_description'],inplace=True,keep='last')

In [None]:
data.describe()

  """Entry point for launching an IPython kernel.


Unnamed: 0,category,headline,authors,link,short_description,date
count,200365,200365,200365.0,200365,200365.0,200365
unique,41,199344,27986.0,200341,178353.0,2309
top,POLITICS,Sunday Roundup,,https://www.huffingtonpost.comhttp://testkitch...,,2012-07-18 00:00:00
freq,32719,90,36514.0,2,19611.0,100
first,,,,,,2012-01-28 00:00:00
last,,,,,,2018-05-26 00:00:00


#Handling null data


In [None]:
data.isnull().sum()

category             0
headline             0
authors              0
link                 0
short_description    0
date                 0
dtype: int64

In [None]:
# there is no null value instead of null they are blank so we need to check for the blank placess and delete that
data[data['headline'] == '']

Unnamed: 0,category,headline,authors,link,short_description,date
86893,RELIGION,,"Matthew Fox, ContributorRadical theologian Mat...",https://www.huffingtonpost.com/entry/post_9671...,Let everyone not wrapped in tired and disprove...,2015-06-30
113471,QUEER VOICES,,"Gary Snyder, ContributorWriter and Media Strat...",https://www.huffingtonpost.com/entry/beverly-h...,,2014-08-28


In [None]:
# drop the blank values
headline_blank = data['headline'] == ''
data = data[~headline_blank]
print("THe length of the datset ------>",data.shape)

THe length of the datset ------> (200363, 6)


In [None]:
#drop the blank short describtion column
description_blank = data['short_description']==''
print("the lenth of the blank description samples----->",len(data[description_blank]))
data = data[~description_blank]
print("THe length of the datset ---------------------->",data.shape)

the lenth of the blank description samples-----> 19610
THe length of the datset ----------------------> (180753, 6)


### Balance the Category Data


In [None]:
category = data['category'].value_counts()
list(category.index)

['POLITICS',
 'WELLNESS',
 'ENTERTAINMENT',
 'STYLE & BEAUTY',
 'TRAVEL',
 'PARENTING',
 'FOOD & DRINK',
 'QUEER VOICES',
 'HEALTHY LIVING',
 'BUSINESS',
 'COMEDY',
 'SPORTS',
 'HOME & LIVING',
 'BLACK VOICES',
 'THE WORLDPOST',
 'WEDDINGS',
 'PARENTS',
 'DIVORCE',
 'IMPACT',
 'WOMEN',
 'CRIME',
 'MEDIA',
 'WEIRD NEWS',
 'WORLD NEWS',
 'GREEN',
 'TECH',
 'TASTE',
 'RELIGION',
 'SCIENCE',
 'MONEY',
 'STYLE',
 'ARTS & CULTURE',
 'ENVIRONMENT',
 'WORLDPOST',
 'FIFTY',
 'GOOD NEWS',
 'LATINO VOICES',
 'CULTURE & ARTS',
 'COLLEGE',
 'EDUCATION',
 'ARTS']

In [None]:
cateo_keep = (data['category'] == 'POLITICS') | (data['category'] == 'WELLNESS' )| (data['category'] == 'ENTERTAINMENT') | (data['category'] == 'TRAVEL') | \
            (data['category'] == 'STYLE & BEAUTY') | (data['category'] == 'PARENTING' )| (data['category'] == 'HEALTHY LIVING') | (data['category'] == 'QUEER VOICES') | \
              (data['category'] == 'FOOD & DRINK') | (data['category'] == 'BUSINESS' )| (data['category'] == 'COMEDY') | (data['category'] == 'PARENTS') | (data['category'] == 'SPORTS') | (data['category'] == 'HOME & LIVING' )| (data['category'] == 'BLACK VOICES')
data = data[cateo_keep]



In [None]:
cateo_keep

0         False
1          True
2          True
3          True
4          True
          ...  
200848    False
200849     True
200850     True
200851     True
200852     True
Name: category, Length: 180753, dtype: bool

In [None]:
cateo_keep = (data['category'] == 'POLITICS') | (data['category'] == 'WELLNESS' )| (data['category'] == 'ENTERTAINMENT') | (data['category'] == 'TRAVEL') | \
            (data['category'] == 'STYLE & BEAUTY') | (data['category'] == 'PARENTING' )| (data['category'] == 'HEALTHY LIVING') | (data['category'] == 'QUEER VOICES') | \
              (data['category'] == 'FOOD & DRINK') | (data['category'] == 'BUSINESS' )| (data['category'] == 'COMEDY') | (data['category'] == 'PARENTS') | (data['category'] == 'SPORTS') | (data['category'] == 'HOME & LIVING' )| (data['category'] == 'BLACK VOICES')
data = data[cateo_keep]



In [None]:
data_1 = data[data['category'] == 'POLITICS']
data_1 = resample(data_1, replace=False, n_samples=3000, random_state=123)
data_2 = data[data['category'] == 'WELLNESS']
data_2 = resample(data_2, replace=False, n_samples=3000, random_state=123)
data_3 = data[data['category'] == 'ENTERTAINMENT']
data_3 = resample(data_3, replace=False, n_samples=3000, random_state=123)
data_4 = data[data['category'] == 'TRAVEL']
data_4 = resample(data_4, replace=False, n_samples=3000, random_state=123)
data_5 = data[data['category'] == 'STYLE & BEAUTY']
data_5 = resample(data_5, replace=False, n_samples=3000, random_state=123)
data_6 = data[data['category'] == 'PARENTING']
data_6 = resample(data_6, replace=False, n_samples=3000, random_state=123)
data_7 = data[data['category'] == 'HEALTHY LIVING']
data_7 = resample(data_7, replace=False, n_samples=3000, random_state=123)
data_8 = data[data['category'] == 'QUEER VOICES']
data_8 = resample(data_8, replace=False, n_samples=3000, random_state=123)
data_9 = data[data['category'] == 'FOOD & DRINK']
data_9 = resample(data_9, replace=False, n_samples=3000, random_state=123)
data_10 = data[data['category'] == 'BUSINESS']
data_10 = resample(data_10, replace=False, n_samples=3000, random_state=123)
data_11 = data[data['category'] == 'COMEDY']
data_11 = resample(data_11, replace=False, n_samples=3000, random_state=123)
data_12= data[data['category'] == 'PARENTS']
data_12 = resample(data_12, replace=False, n_samples=3000, random_state=123)
data_13= data[data['category'] == 'SPORTS']
data_13 = resample(data_13, replace=False, n_samples=3000, random_state=123)
data_14 = data[data['category'] == 'HOME & LIVING']
data_14 = resample(data_14, replace=False, n_samples=3000, random_state=123)
data_15 = data[data['category'] == 'BLACK VOICES']
data_15 = resample(data_15, replace=False, n_samples=3000, random_state=123)

cato_list = [data_1 , data_2 , data_3 , data_4 ,data_5 , data_6 , data_7, data_8 , data_9 , data_10, data_11 , data_12 , data_13, data_14 ,data_15]

data = pd.concat(cato_list)
data['category'].value_counts()

POLITICS          3000
WELLNESS          3000
ENTERTAINMENT     3000
TRAVEL            3000
STYLE & BEAUTY    3000
PARENTING         3000
HEALTHY LIVING    3000
QUEER VOICES      3000
FOOD & DRINK      3000
BUSINESS          3000
COMEDY            3000
PARENTS           3000
SPORTS            3000
HOME & LIVING     3000
BLACK VOICES      3000
Name: category, dtype: int64

In [None]:
data.head(5)

Unnamed: 0,category,headline,authors,link,short_description,date
77976,POLITICS,Republican Activists Increasingly Favor Outsid...,Mark Blumenthal and Ariel Edwards-Levy,https://www.huffingtonpost.com/entry/republica...,"Is this election an anomaly, or will the party...",2015-10-09
46953,POLITICS,A Crooked Wall Street Billionaire Is Crafting ...,Zach Carter,https://www.huffingtonpost.com/entry/a-crooked...,Hoo boy.,2016-09-26
356,POLITICS,GOP Candidate Set Up Trust That Pays His Kids ...,Mary Papenfuss,https://www.huffingtonpost.com/entry/vickers-c...,"Even his own mom calls him a bigot, but former...",2018-05-19
106292,POLITICS,"The U.S. Isn't in the Business of Bigotry, Are...","Terry O'Neill, ContributorPresident, National ...",https://www.huffingtonpost.com/entry/the-us-do...,"Birth control is life-saving, life-affirming h...",2014-11-19
67266,POLITICS,Bernie Sanders On 'SNL' Is Everything We Hoped...,Paige Lavender,https://www.huffingtonpost.com/entry/bernie-sa...,Amazing.,2016-02-07


## Combining the column


In [None]:
data['text'] = data['headline']+'-'+data['short_description']

In [None]:
data.head(5)

Unnamed: 0,category,headline,authors,link,short_description,date,text
77976,POLITICS,Republican Activists Increasingly Favor Outsid...,Mark Blumenthal and Ariel Edwards-Levy,https://www.huffingtonpost.com/entry/republica...,"Is this election an anomaly, or will the party...",2015-10-09,Republican Activists Increasingly Favor Outsid...
46953,POLITICS,A Crooked Wall Street Billionaire Is Crafting ...,Zach Carter,https://www.huffingtonpost.com/entry/a-crooked...,Hoo boy.,2016-09-26,A Crooked Wall Street Billionaire Is Crafting ...
356,POLITICS,GOP Candidate Set Up Trust That Pays His Kids ...,Mary Papenfuss,https://www.huffingtonpost.com/entry/vickers-c...,"Even his own mom calls him a bigot, but former...",2018-05-19,GOP Candidate Set Up Trust That Pays His Kids ...
106292,POLITICS,"The U.S. Isn't in the Business of Bigotry, Are...","Terry O'Neill, ContributorPresident, National ...",https://www.huffingtonpost.com/entry/the-us-do...,"Birth control is life-saving, life-affirming h...",2014-11-19,"The U.S. Isn't in the Business of Bigotry, Are..."
67266,POLITICS,Bernie Sanders On 'SNL' Is Everything We Hoped...,Paige Lavender,https://www.huffingtonpost.com/entry/bernie-sa...,Amazing.,2016-02-07,Bernie Sanders On 'SNL' Is Everything We Hoped...


In [None]:
#drop the other columns
data.drop(['authors','headline','short_description'],axis=1,inplace=True)

In [None]:

# shuffle the dataset 
from sklearn.utils import shuffle
data = shuffle(data)
data.reset_index(inplace=True, drop=True) 
data.head(4)

Unnamed: 0,category,link,date,text
0,BLACK VOICES,https://www.huffingtonpost.com/entry/3-chicago...,2017-06-27,3 Chicago Cops Charged In Cover-Up Of Laquan M...
1,BLACK VOICES,https://www.huffingtonpost.com/entry/memphis-t...,2017-08-28,Memphis Theater Won't Play 'Gone With The Wind...
2,COMEDY,https://www.huffingtonpost.com/entry/teens-rea...,2015-07-13,Teens Reacting To Encyclopedias Will Probably ...
3,PARENTING,https://www.huffingtonpost.com/entry/friends-b...,2013-01-27,My Friends Were Becoming Grandparents And It '...


## Training the data


In [None]:
#train and test split
X = data['text']
#label encoding the target
label = LabelEncoder()
y = label.fit_transform(data['category'])

#split the train and test dataset
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.1,random_state=2)

In [None]:
#ifidf vectorizer

vecto =  TfidfVectorizer(stop_words='english',max_df = 0.99,min_df=10,
                                   ngram_range=(1, 2),lowercase=True, max_features=5000)
vecto = vecto.fit(X_train)

X_train = vecto.transform(X_train).toarray()
X_test = vecto.transform(X_test).toarray()
X_train.shape

(40500, 5000)

In [None]:
tfidf_df = pd.DataFrame(X_train,columns = vecto.get_feature_names())
tfidf_df.head(4)



Unnamed: 0,000,10,10 best,10 reasons,10 things,10 tips,10 ways,10 year,10 years,100,...,youtube,zealand,zen,zero,zika,zika virus,zimmerman,zoe,zone,zuckerberg
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
print("The X_train shape----->",X_train.shape)
print('The X_text shape------>',X_test.shape)
print("THe y_train shape----->",y_train.shape)
print("The y_test shape------>",y_test.shape)

The X_train shape-----> (40500, 5000)
The X_text shape------> (4500, 5000)
THe y_train shape-----> (40500,)
The y_test shape------> (4500,)


In [None]:

def classifier_scores(y_train,y_test, pred_train, pred_test):
    
    print()
    print("Train data accuracy score: ", accuracy_score(y_train,pred_train))    
    print("Test data accuracy score: ", accuracy_score(y_test,pred_test))
    print()
    print("Recall score on train data: ", recall_score(y_train,pred_train, average='macro'))
    print("Recall score on test data: ",recall_score( y_test,pred_test, average='macro'))
    print()
    
    print("Precision score on train data: ",precision_score(y_train,pred_train, average='macro'))
    print("Precision score on test data: ",precision_score(y_test,pred_test, average='macro'))
    print()
    print("F1 score on train data: ",f1_score(y_train,pred_train, average='macro'))
    print("F1 score on test data: ",f1_score(y_test,pred_test, average='macro'))

In [None]:
## Multinomial Naive Bayes
print("Multinamial NB----------------------------------->")
multinb = MultinomialNB()
multinb.fit(X_train , y_train)

y_train_pred = multinb.predict(X_train)
y_test_pred = multinb.predict(X_test)
classifier_scores(y_train,y_test,y_train_pred,y_test_pred)

Multinamial NB----------------------------------->

Train data accuracy score:  0.7319753086419754
Test data accuracy score:  0.6488888888888888

Recall score on train data:  0.7323129950981127
Recall score on test data:  0.6442580544769801

Precision score on train data:  0.7342138020870859
Precision score on test data:  0.6489390838441552

F1 score on train data:  0.731993602921203
F1 score on test data:  0.6448504755182449


## Training process


In [None]:
new_doc = """
"Be confident in your heritage. Be confident in your blackness," President Barack Obama told graduates and their families at Howard University's 2016 Commencement Ceremony. It was one of many moments in a speech that honored the achievements of black folks — many Howard alumni — and called on graduates to get and stay politically active. His speech was met with laughter, generous applause, and largely positive reviews. Paul Holston, editor-in-chief of Howard's student newspaper The Hilltop, wrote that Obama's address was "strong, eloquent, and inspirational," and would "go down as one of the most significant moments in Howard University's history."

Where Does The 'Pull Up Your Pants' School Of Black Politics Come From?
CODE SWITCH
Where Does The 'Pull Up Your Pants' School Of Black Politics Come From?
Howard students weren't the only ones cheering over the speech. Janell Ross at The Washington Post lauded Obama's call for "empathy and [an] expanded moral imagination" as one of the few surprising and thought-provoking messages that graduates will receive this season. On Twitter, Slate writer Jamelle Bouie called the speech "a great mediation on democracy AND a celebration of black life." Mathew Rodriguez at Mic described Obama's speech as "one of the best and blackest he's given."

Melissa Harris-Perry, editor-at-large of Elle, wrote that Obama's speech was remarkable in its treatment of gender as well as race, and proved "that he is our most black, feminist president to date" by highlighting the genius of black women like Lorraine Hansberry, Harriet Tubman, Fannie Lou Hamer and Zora Neale Hurston:

"Once again, [Obama] put black women at the very center of the stories he told and the lessons he imparted. As he warmed up, he jokingly referred to 'Shonda Rhimes owning Thursday night' and 'Beyonce running the world.' They were casual references, not central themes of his talk, but even here he deployed two boss black women as representatives of black excellence and achievement."

The tone surprised some African-Americans who had been critical of what they see as the president's habit of talking down to primarily black crowds. The last time Obama spoke at an HBCU's commencement was at Morehouse College in 2013, where he was criticized for promoting a finger-wagging brand of respectability politics with remarks like these:

"Just as Morehouse has taught you to expect more of yourselves, inspire those who look up to you to expect more of themselves. We know that too many young men in our community continue to make bad choices. And I have to say, growing up, I made quite a few myself. Sometimes I wrote off my own failings as just another example of the world trying to keep a black man down. I had a tendency sometimes to make excuses for me not doing the right thing. But one of the things that all of you have learned over the last four years is there's no longer any room for excuses."

Ta-Nehisi Coates responded to that speech by calling out what he saw as the double standard Obama used in addressing African-Americans. In a piece called "How the Obama Administration Talks to Black America," Coates wrote that the president acts like someone "who sees holding African Americans to a standard of individual responsibility as part of his job. This is not a role Barack Obama undertakes with other communities."

Some observers who were worried that the president might affect the same scolding posture at Howard were pleased, if not entirely won over. Michael P. Jeffries, over at The Boston Globe, said that the Howard speech was more earnest in its depiction of structural inequality:

"...noting that a black woman is only paid 66 cents for every $1 earned by an equally qualified white man, and that mass incarceration has exploded since [Obama's] college days. In one striking passage, he reminded the audience: 'We have cousins and uncles and brothers and sisters who we remember were just as smart and just as talented as we were, but somehow got ground down by structures that are unfair and unjust.' Rather than individual failings, Obama shifted responsibility away from black families and toward the institutions that produce black suffering."

Still, Jeffries observes "how much further [Obama] has to go" to fairly depict race in America. In his speech, Obama praises Black Lives Matter activist Brittany Packnett as someone who broke with the orthodoxy of her movement to enact change. But, according to Jeffries:

"What Obama left out is that Packnett is not an anomaly among Black Lives Matter leadership. Protesters have interrupted campaign events for Hillary Clinton, Bernie Sanders, and Donald Trump, but activist DeRay McKesson certainly believes in voting: He ran for mayor of Baltimore. The Chicago-based Black Youth Project has protested mayor Rahm Emanuel and held rallies in the name of Rekia Boyd and other victims of police violence. The organization has also published research reports, and its directors have worked with several well established and likeminded groups, including the NAACP.

So, in many respects, Black Lives Matter is already living out the charge put forth by the president."

Clarence B. Jones, at HuffPost Black Voices, wrote that "the content of what President Obama said, and the way in which he spoke it were engaging; at times, powerfully moving." But he agreed with Jeffries's sentiment that the president undersold the accomplishments of the BLM movement when he chastised young people for not turning out to vote in midterm elections:

"It is not enough to patronizingly lecture that 'the perfect' 'should not be the enemy of the good or the better.' He should have not just singled out Brittany Packnett, a leader in the Black Lives Matter Movement, for praise in meeting with him and other establishment political leaders. He should have said, flat out, like 'Straight Outta Compton,' that leaders of the Movement, like Dr. King earlier, had forced America's conscience to confront the reality of successive police shootings of black men, in several circumstances where the use of non-lethal force appeared to be an available option to effect an arrest.

In effect, President Obama should have acknowledged that he AND ALL America owe a debt of gratitude to the courage and leadership of the Black Lives Matter movement in highlighting the apparent systemic racism in our criminal justice system when applied to African-Americans in several or our communities, nationwide."

BLM activists themselves had some things to say about Obama's speech. DeRay McKesson tweeted his approval of the president's remarks, but also warned about oversimplifying the message:

"Obama's commencement speech at HowardU today was important, as we both reflected on the past in blackness and our future. Obama's speech was complicated, as he noted the role of compromise in the work of progress, while maintains a commitment to one's values. Obama also rightly noted that awareness is not the win, but is the initial work that creates space for later wins. [He] also noted the work of activists like [Brittany Packnett], noting that we will have to be intentional in how we change systems/structures. In many ways, this speech echoed themes he offered when [she and I] met with him a month ago. He is becoming more explicit re: discussing blackness."

McKesson continued:

"We protest to change the world, not to continue protesting until the end of time. Awareness must lead to work focused on concrete solutions. Obama's focus on voting was not an indictment of the movement, of protest, or of organizing. Don't reduce his speech to this stale reading."

Still, others weren't blown away by the speech. Maya Rhodan at Time magazine described Obama's Howard speech as another replica from the "mold he often leans on in remarks to black audiences." She offered up the president's time-tested speech recipe: "a nod to our nation's racial history, a pit-stop on his presidency, and a call to pay it forward."

In Commencement Speech, Obama Advises Howard University Grads On Creating Change
THE TWO-WAY
In Commencement Speech, Obama Advises Howard University Grads On Creating Change
Over at The Guardian, Steven W. Thrasher reflected that "part of Obama's genius as our first black president is that he can provoke so many responses...even in the course of a single speech." But that genius is complicated. Thrasher was inspired by Obama's address until "respectability politics started to creep in," and the president began urging the crowd to empathize with "the refugee, the immigrant, the rural poor, the transgender person and yes, the middle-aged white guy who you may think has all the advantages, but over the last several decades has seen his world upended by economic and cultural and technological change and feels powerless to stop it." Here's more from Thrasher:

"Why did the nation's first black president feel the need to equate the transgender person who can't use the bathroom in North Carolina, and the unfairly maligned immigrant with that 'middle-aged white guy'?

Who feels so threatened by the 'cultural' change of living under a black president and living under conditions a little more like those black Americans have endured for hundreds of years that he's likely voting for Donald Trump?

Who isn't losing all of his white privilege, because he still has a black president telling black grads to get in his head?
"""

In [None]:
doc_list = []
doc_list.append(new_doc)
doc_test = vecto.transform(doc_list)
return_pred = multinb.predict(doc_test)

label.inverse_transform(return_pred) #### Result

array(['BLACK VOICES'], dtype=object)