# 1. Data Preparation

## 1.1. Web Scraping - Reddit WorldNews Headlines Data

In [1]:
!pip3 install praw
import praw

!pip3 install psaw
from psaw import PushshiftAPI
api = PushshiftAPI()

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting praw
  Downloading praw-7.6.0-py3-none-any.whl (188 kB)
[K     |████████████████████████████████| 188 kB 5.1 MB/s 
[?25hCollecting websocket-client>=0.54.0
  Downloading websocket_client-1.3.2-py3-none-any.whl (54 kB)
[K     |████████████████████████████████| 54 kB 2.7 MB/s 
[?25hCollecting prawcore<3,>=2.1
  Downloading prawcore-2.3.0-py3-none-any.whl (16 kB)
Collecting update-checker>=0.18
  Downloading update_checker-0.18.0-py3-none-any.whl (7.0 kB)
Installing collected packages: websocket-client, update-checker, prawcore, praw
Successfully installed praw-7.6.0 prawcore-2.3.0 update-checker-0.18.0 websocket-client-1.3.2
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting psaw
  Downloading psaw-0.1.0-py3-none-any.whl (15 kB)
Installing collected packages: psaw
Successfully installed psaw-0.1.0


In [2]:
import numpy as np
import pandas as pd
import datetime

In [3]:
### Before PRAW can be used to scrape data we need to authenticate ourselves. 
### For this we need to create a Reddit instance and provide it with 
### a client_id, client_secret, and an user_agent .
reddit = praw.Reddit(client_id='heqEngJNw6njXzmDGdKFDw', 
                     client_secret='XS0l1e_WVb65wkYRSbqmsJoNXfYfhA', 
                     user_agent='NewsHeadlines WebScraping',
                     check_for_async=False)

In [None]:
## Get Reddit Data of worldNews headlines year by year with PSAW
### The large amount of data would be easier to collect by 
### changing the index each year.
posted_after = int(datetime.datetime(2020, 1, 1).timestamp())
posted_before = int(datetime.datetime(2021, 1, 1).timestamp())

### To get approximative top 25 headlines ranked by changing 
### the index of 'score' and most of the time, 
### we need to download the data first and use R and Excel 
### to delete them to get the top 25 headlines for a single day.
api_request_generator = api.search_submissions(subreddit='worldnews',
                                               after=posted_after,
                                               before=posted_before,
                                               score = '>50')

year_2020_submissions = pd.DataFrame(
    [submission.d_ for submission in api_request_generator])

### add one more column to convert the unix time format to the one we can read
year_2020_submissions['date'] = pd.to_datetime(
    year_2020_submissions['created_utc'], 
    utc=True, unit='s')

### store the data as csv format year by year
year_2020_submissions.to_csv('year_2020_dataset.csv', index = False)

### have an overview of the dimension of the table
year_2020_submissions.shape




(369034, 84)

## 1.2. Get Dow Jones Industrial Average Data Preparation using R
## 1.3. Combine Year 2011 to 2021 News Headlines Data using R
## 1.4. Double Confirm to get top 25 headlines Data using R and Excel
## 1.5. Combine News Headlines data and DJIA index data with Excel

## 1.6. Load All Data

In [4]:
!pip install vaderSentiment

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
[K     |████████████████████████████████| 125 kB 4.3 MB/s 
Installing collected packages: vaderSentiment
Successfully installed vaderSentiment-3.3.2


In [5]:
# Import library: 

from textblob import TextBlob
import re
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

import numpy as np
import pandas as pd
import datetime

In [7]:
### Load Reddit News Healines Data
news_data = pd.read_csv('2.1_Reddit News Headlines 2011_2021.csv')
news_data.head()


Unnamed: 0,date,top,title
0,01/01/2011,top1,Wikileaks cable reveals U.S. conspired to reta...
1,01/01/2011,top2,Latin American countries recognize the Palesti...
2,01/01/2011,top3,"""China's first known stealth aircraft just eme..."
3,01/01/2011,top4,2011 arrives around the world. - The Big Picture
4,01/01/2011,top5,WikiLeaks hackers say Zim websites shut down -...


In [8]:
### Check the null object in Reddit News Healines Data
news_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 95972 entries, 0 to 95971
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   date    95972 non-null  object
 1   top     95972 non-null  object
 2   title   95972 non-null  object
dtypes: object(3)
memory usage: 2.2+ MB


In [9]:
### Load DJIA Index Data
stock_market_data = pd.read_csv('1_DJIA_data_2011_2021.csv')
stock_market_data["date"] = pd.to_datetime(stock_market_data["date"])
stock_market_data = stock_market_data.sort_values(by = 'date', ascending = True)
stock_market_data.head()
stock_market_data.describe()

Unnamed: 0,open,high,low,close,volume,adjusted
count,2769.0,2769.0,2769.0,2769.0,2769.0,2769.0
mean,20578.421177,20685.32896,20468.155666,20583.370246,223201800.0,20583.370246
std,6642.622861,6674.622341,6611.029925,6643.124025,137676600.0,6643.124025
min,10651.44,10808.49,10404.49,10655.3,8410000.0,10655.3
25%,15496.63,15555.07,15421.75,15498.32,106910000.0,15498.32
50%,18139.1,18213.26,18064.5,18135.72,188130000.0,18135.72
75%,25678.17,25810.43,25501.45,25679.9,312770000.0,25679.9
max,36522.48,36679.44,36396.19,36488.63,915990000.0,36488.63


In [11]:
### Check the null object in DJIA Index Data
stock_market_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2769 entries, 20 to 2768
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   symbol    2769 non-null   object        
 1   date      2769 non-null   datetime64[ns]
 2   open      2769 non-null   float64       
 3   high      2769 non-null   float64       
 4   low       2769 non-null   float64       
 5   close     2769 non-null   float64       
 6   volume    2769 non-null   float64       
 7   adjusted  2769 non-null   float64       
dtypes: datetime64[ns](1), float64(6), object(1)
memory usage: 194.7+ KB


In [13]:
### Load combined Data (Reddit News Headlines + DJIA Label Data)
combined_data = pd.read_csv('3_combined_Headlines_DJIA_2011_2021.csv')
combined_data["date"] = pd.to_datetime(combined_data["date"])
combined_data.head()


Unnamed: 0,date,label,top1,top2,top3,top4,top5,top6,top7,top8,...,top16,top17,top18,top19,top20,top21,top22,top23,top24,top25
0,2011-03-01,1,"German Interior Minister: ""WikiLeaks is irrita...",Her father killed the boy and four of his brot...,An anonymous group of Palestinian students has...,"The Guardian's New Year resolutions: ""Visa, Ma...","""The diplomatic telegrams that WikiLeaks publi...","Chinese police's flight to congo to ""rescue"" 1...","Small town of Zug, Switzerland is the headquar...",'Israel Navy chasing Gaza-bound Asia 1' - Two ...,...,1000 indian farmers to travel to cuba to under...,Spanish revellers stub out cigarettes as new l...,"IDF chief: ""I am preparing the Israeli army fo...","PA official: Chile, Uruguay to recognize Pales...",Palestine: Brazil to Host First Palestinian Em...,Germans more hostile to Muslims than other Wes...,Israeli fighter jets strike Gaza Strip - Israe...,"Iran says shot down two ""western drone reconna...",Classified maps show security in Afghanistan i...,Russia-China oil pipeline opens
1,2011-04-01,1,Wikileaks releases cable of the July 1990 meet...,"""The WikiLeaks saga has a message . . . Your d...",7.1 magnitude earthquake hits Chile.,Lack of Jobs in Southern Europe Frustrates the...,The Village Where the Neo-Nazis Rule. Hitler ...,Settlers set fire to home as seven Palestinian...,Actor Pete Postlethwaite dies :(,"WikiLeaks: Israel Plans Total War on Lebanon, ...",...,Anonymous lends a hand to Tunisian protesters:...,N Korea to spend 100 million on new offices &a...,'The English language has almost doubled in si...,China boasts breakthrough in nuclear technology,Investigation closes in on German weapons comp...,Ugandan high court rules Rolling Stone newspap...,Japan Pushes Away Skilled Immigrants Despite A...,"Anticipating a WikiLeaks disclosure, Bank of A...",Greece decides to construct a fence along its ...,Israel extends a six-month ban on the immigrat...
2,2011-05-01,1,Punjab(Pakistan) governor Salman Taseer assass...,Some Christians have predicted the end of days...,"If Youre Homeless In Japan, Here Are Your Options",And so it starts...Sri Lanka considers banning...,Saudi Arabia announces that all online publish...,A 15-year-old girl is more likely to die in ch...,As Hungary takes over the rotating presidency ...,WikiLeaks: Drug Enforcement Agency Goes \r\nGl...,...,Reynaldo Dagsa was taking a picture of his fam...,Volunteer Cyber Army Emerges in Estonia.,Cartels carve bloody swath through Central Ame...,Greece plans to build a 12.5km fence at border...,Argentina's 'impressive' bank heist - Al Jaze...,Iran invites nations to nuclear plants: \r\nIr...,Japan to Propose Closer Military Ties With S. ...,"Crocodiles, snakes are danger in flooded Austr...",U.S. agents are armed with insider information...,Mountain Revolt - Bavarian Farmers Threaten Bi...
3,2011-06-01,0,Cable from the U.S. Embassy in Tel Aviv says I...,"Over 3,000 birds fall dead in AR, over 500 in ...",Assange: WikiLeaks does not have concerns for ...,Greek professor of linguistics at Cambridge Un...,"President Ahmedinejad ""had harsh words for the...",WikiLeaks: Chinese attacks on Google came from...,"""France is the country that conducts the most ...","Chinese adult children must, by law, visit par...",...,Israeli air strikes hit Gaza. Israeli warplane...,Romania's witches forced to pay income tax - a...,Police in the UK have banned ITN from a press ...,"""The Pentagon underestimated the speed at whic...",Mexico overtakes US in percentage of populatio...,BBC News - Palestinians look for int'l support...,"""In what appears to be the first diplomatic ca...",Uganda court bans media outing people as Gay f...,Mongolia is experiencing a huge resource-based...,France top culprit in industrial espionage - F...
4,2011-07-01,0,Italy becomes the first country to ban plastic...,Five Myths About the Chinese Communist Party,The little red book that swept France: The la...,Furore over 'censored' edition of Huckleberry ...,"Together, Slim, Gates, Buffett, and Ambani con...",China hiding military build-up: WikiLeaks,Population 7 Billion - By 2045 global populati...,Israelis bribed to admit U.S. goods to Gaza: W...,...,Mossad's new director says that a military att...,Increased numbers of policemen are being deplo...,Anonymous joins Tunisian activists to call for...,Ivory Coast: Gbagbo 'expels UK and Canada envoys',UK students to launch new wave of protests,Food Price Surge Puts Strain On India's Poor\r\n,WikiLeaks XXVIII: Organized Crime Squeezing th...,Pakistan's government nears collapse again,Saudi Arabia 'detains' Israeli vulture for spying,"""Taseer died for the Christians and now we are..."


In [14]:
combined_data.shape

(2503, 27)

In [15]:
# Convert date data-type
stock_market_data['date'] = pd.to_datetime(stock_market_data.date)
combined_data['date'] = pd.to_datetime(combined_data.date)

In [16]:
# Check the length of data-frame and check for null values
stock_market_data.info()
combined_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2769 entries, 20 to 2768
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   symbol    2769 non-null   object        
 1   date      2769 non-null   datetime64[ns]
 2   open      2769 non-null   float64       
 3   high      2769 non-null   float64       
 4   low       2769 non-null   float64       
 5   close     2769 non-null   float64       
 6   volume    2769 non-null   float64       
 7   adjusted  2769 non-null   float64       
dtypes: datetime64[ns](1), float64(6), object(1)
memory usage: 194.7+ KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2503 entries, 0 to 2502
Data columns (total 27 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   date    2503 non-null   datetime64[ns]
 1   label   2503 non-null   int64         
 2   top1    2503 non-null   object        
 3   top2    2503 non-null   obje

In [17]:
# to see whether we have missing data in DJIA index.
total = stock_market_data.isnull().sum().sort_values(ascending = False)

percent = (stock_market_data.isnull().sum()/ stock_market_data.isnull().count()).sort_values(ascending = False)
missing_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
missing_data

Unnamed: 0,Total,Percent
symbol,0,0.0
date,0,0.0
open,0,0.0
high,0,0.0
low,0,0.0
close,0,0.0
volume,0,0.0
adjusted,0,0.0


In [18]:
# Merge the data 1_DJIA_data_2011_2021.csv and 3_combined_Headlines_DJIA_2011_2021.csv on the date field:

index_headlines_data= pd.merge(combined_data, stock_market_data, how='left', on='date')
index_headlines_data

Unnamed: 0,date,label,top1,top2,top3,top4,top5,top6,top7,top8,...,top23,top24,top25,symbol,open,high,low,close,volume,adjusted
0,2011-03-01,1,"German Interior Minister: ""WikiLeaks is irrita...",Her father killed the boy and four of his brot...,An anonymous group of Palestinian students has...,"The Guardian's New Year resolutions: ""Visa, Ma...","""The diplomatic telegrams that WikiLeaks publi...","Chinese police's flight to congo to ""rescue"" 1...","Small town of Zug, Switzerland is the headquar...",'Israel Navy chasing Gaza-bound Asia 1' - Two ...,...,"Iran says shot down two ""western drone reconna...",Classified maps show security in Afghanistan i...,Russia-China oil pipeline opens,^DJI;,11577.43,11711.47,11577.35,11670.75,203420000.0,11670.75
1,2011-04-01,1,Wikileaks releases cable of the July 1990 meet...,"""The WikiLeaks saga has a message . . . Your d...",7.1 magnitude earthquake hits Chile.,Lack of Jobs in Southern Europe Frustrates the...,The Village Where the Neo-Nazis Rule. Hitler ...,Settlers set fire to home as seven Palestinian...,Actor Pete Postlethwaite dies :(,"WikiLeaks: Israel Plans Total War on Lebanon, ...",...,"Anticipating a WikiLeaks disclosure, Bank of A...",Greece decides to construct a fence along its ...,Israel extends a six-month ban on the immigrat...,^DJI;,11670.90,11698.22,11635.74,11691.18,178630000.0,11691.18
2,2011-05-01,1,Punjab(Pakistan) governor Salman Taseer assass...,Some Christians have predicted the end of days...,"If Youre Homeless In Japan, Here Are Your Options",And so it starts...Sri Lanka considers banning...,Saudi Arabia announces that all online publish...,A 15-year-old girl is more likely to die in ch...,As Hungary takes over the rotating presidency ...,WikiLeaks: Drug Enforcement Agency Goes \r\nGl...,...,"Crocodiles, snakes are danger in flooded Austr...",U.S. agents are armed with insider information...,Mountain Revolt - Bavarian Farmers Threaten Bi...,^DJI;,11688.61,11742.68,11652.89,11722.89,169990000.0,11722.89
3,2011-06-01,0,Cable from the U.S. Embassy in Tel Aviv says I...,"Over 3,000 birds fall dead in AR, over 500 in ...",Assange: WikiLeaks does not have concerns for ...,Greek professor of linguistics at Cambridge Un...,"President Ahmedinejad ""had harsh words for the...",WikiLeaks: Chinese attacks on Google came from...,"""France is the country that conducts the most ...","Chinese adult children must, by law, visit par...",...,Uganda court bans media outing people as Gay f...,Mongolia is experiencing a huge resource-based...,France top culprit in industrial espionage - F...,^DJI;,11716.93,11736.74,11667.46,11697.31,193080000.0,11697.31
4,2011-07-01,0,Italy becomes the first country to ban plastic...,Five Myths About the Chinese Communist Party,The little red book that swept France: The la...,Furore over 'censored' edition of Huckleberry ...,"Together, Slim, Gates, Buffett, and Ambani con...",China hiding military build-up: WikiLeaks,Population 7 Billion - By 2045 global populati...,Israelis bribed to admit U.S. goods to Gaza: W...,...,Pakistan's government nears collapse again,Saudi Arabia 'detains' Israeli vulture for spying,"""Taseer died for the Christians and now we are...",^DJI;,11696.86,11726.94,11599.68,11674.76,188720000.0,11674.76
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2498,2021-12-27,1,Windsor Castle intruder 'armed with a crossbow...,Israeli PM announces plan to double the amount...,Iran threatens nuclear explosion in Israel????...,What Putin's Russia fears most of all | The Sp...,Call It Like I See It,"Training Civilians, Ukraine Nurtures a Resista...","Training Civilians, Ukraine Nurtures a Resista...",The living Christmas Tree,...,Teresa Giudice receives new car from fianc?? L...,Colorado mom found homeless in NYC now ?????sa...,"Tom Holland, dating taller Zendaya, likes post...",^DJI;,35954.48,36306.61,35954.48,36302.38,244350000.0,36302.38
2499,2021-12-28,1,India freezes bank accounts of Mother Teresa's...,"ENTREVISTA: A MIQUEL CRUZ, ESCRITOR Y NOVELIST...",Jesus statue smashed in spate of attacks on In...,Please join me live This Now 7am US Eastern ti...,New study finds booster protection against Omi...,(STL.News) Thirty-five people were massacred o...,U.S. President Biden signs US$770 billion defe...,CDC cuts quarantine time for COVID-affected Am...,...,Nicaragua seizes former Taiwan embassy to give...,?????Karen????? caught on video slapping passe...,How Ethiopia?????s Conflict Deepened in 2021,^DJI;,36302.99,36527.26,36302.99,36398.21,239090000.0,36398.21
2500,2021-12-29,1,Biden signs $740B defense policy bill to overh...,"Afghanistan?????s Former Female Troops, Once H...","(STL.News) Last week, China made two deliverie...","NAKED JOURNALIST ""FALLS"" FROM WINDOW IN RUSSIA","Chile Rewrites Its Constitution, Confronting C...",Haile banks got invited by Milla Mac and it crazy,Evidence from the UK suggests Australia should...,Read the latest stories through PIB tweets as ...,...,BBC only just behind Hamas in respected Jewish...,?????Karen????? who slapped Delta passenger is...,China Says It Complained to UN About Musk?????...,^DJI;,36421.14,36571.55,36396.19,36488.63,213480000.0,36488.63
2501,2021-12-30,0,Locked-down residents in one of China's bigges...,REFLEXI??N: GRATITUD EN LA ADVERSIDAD,PACHI LARREA: EN SU PUNTO,Israel's Gantz Hosts Palestinian President Abb...,"Vean esta particular cena de noche buena, no l...",Hong Kong Police Raid Office of Pro-Democracy ...,this is an article,COVID-19: France hit by ?????dizzying????? dai...,...,Australian barred from leaving Israel until th...,After 2 years China still not allowing interna...,"Harry, Ron and Hermione reunite for Harry Pott...",^DJI;,36522.48,36679.44,36372.13,36398.08,205620000.0,36398.08


## 1.7. Check missing data

In [19]:
# to see whether we have missing data or not.
total = index_headlines_data.isnull().sum().sort_values(ascending = False)

percent = (index_headlines_data.isnull().sum()/ index_headlines_data.isnull().count()).sort_values(ascending = False)
missing_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
missing_data

Unnamed: 0,Total,Percent
top25,1,0.0004
top24,0,0.0
top18,0,0.0
top19,0,0.0
top20,0,0.0
top21,0,0.0
top22,0,0.0
top23,0,0.0
date,0,0.0
label,0,0.0


## 1.8 Combine news headlines as one block to feed:

In [20]:
# Combine the top news headlines:

headlines= []

for row  in range (0, len(index_headlines_data.index)):
    headlines.append(" ".join(str(x) for x in index_headlines_data.iloc[row, 2:27]))

In [21]:
# print a sample of combine headlines:

headlines[0]



In [22]:
# clean the data: remove breaks and slashes
clean_headline = []

for h in range (0, len(headlines)):
    clean_headline.append(re.sub("b[(')]", '', headlines[h])) # remove b'
    clean_headline[h] = re.sub('b[(")]', '', clean_headline[h]) # remove b"
    clean_headline[h] = re.sub("\'", '', clean_headline[h]) # remove \'

In [23]:
clean_headline[0]



In [24]:
# Add the clean headline to the merge data:
index_headlines_data['Combined_News'] = clean_headline
index_headlines_data

Unnamed: 0,date,label,top1,top2,top3,top4,top5,top6,top7,top8,...,top24,top25,symbol,open,high,low,close,volume,adjusted,Combined_News
0,2011-03-01,1,"German Interior Minister: ""WikiLeaks is irrita...",Her father killed the boy and four of his brot...,An anonymous group of Palestinian students has...,"The Guardian's New Year resolutions: ""Visa, Ma...","""The diplomatic telegrams that WikiLeaks publi...","Chinese police's flight to congo to ""rescue"" 1...","Small town of Zug, Switzerland is the headquar...",'Israel Navy chasing Gaza-bound Asia 1' - Two ...,...,Classified maps show security in Afghanistan i...,Russia-China oil pipeline opens,^DJI;,11577.43,11711.47,11577.35,11670.75,203420000.0,11670.75,"German Interior Minister: ""WikiLeaks is irrita..."
1,2011-04-01,1,Wikileaks releases cable of the July 1990 meet...,"""The WikiLeaks saga has a message . . . Your d...",7.1 magnitude earthquake hits Chile.,Lack of Jobs in Southern Europe Frustrates the...,The Village Where the Neo-Nazis Rule. Hitler ...,Settlers set fire to home as seven Palestinian...,Actor Pete Postlethwaite dies :(,"WikiLeaks: Israel Plans Total War on Lebanon, ...",...,Greece decides to construct a fence along its ...,Israel extends a six-month ban on the immigrat...,^DJI;,11670.90,11698.22,11635.74,11691.18,178630000.0,11691.18,Wikileaks releases cable of the July 1990 meet...
2,2011-05-01,1,Punjab(Pakistan) governor Salman Taseer assass...,Some Christians have predicted the end of days...,"If Youre Homeless In Japan, Here Are Your Options",And so it starts...Sri Lanka considers banning...,Saudi Arabia announces that all online publish...,A 15-year-old girl is more likely to die in ch...,As Hungary takes over the rotating presidency ...,WikiLeaks: Drug Enforcement Agency Goes \r\nGl...,...,U.S. agents are armed with insider information...,Mountain Revolt - Bavarian Farmers Threaten Bi...,^DJI;,11688.61,11742.68,11652.89,11722.89,169990000.0,11722.89,PunjaPakistan) governor Salman Taseer assassin...
3,2011-06-01,0,Cable from the U.S. Embassy in Tel Aviv says I...,"Over 3,000 birds fall dead in AR, over 500 in ...",Assange: WikiLeaks does not have concerns for ...,Greek professor of linguistics at Cambridge Un...,"President Ahmedinejad ""had harsh words for the...",WikiLeaks: Chinese attacks on Google came from...,"""France is the country that conducts the most ...","Chinese adult children must, by law, visit par...",...,Mongolia is experiencing a huge resource-based...,France top culprit in industrial espionage - F...,^DJI;,11716.93,11736.74,11667.46,11697.31,193080000.0,11697.31,Cable from the U.S. Embassy in Tel Aviv says I...
4,2011-07-01,0,Italy becomes the first country to ban plastic...,Five Myths About the Chinese Communist Party,The little red book that swept France: The la...,Furore over 'censored' edition of Huckleberry ...,"Together, Slim, Gates, Buffett, and Ambani con...",China hiding military build-up: WikiLeaks,Population 7 Billion - By 2045 global populati...,Israelis bribed to admit U.S. goods to Gaza: W...,...,Saudi Arabia 'detains' Israeli vulture for spying,"""Taseer died for the Christians and now we are...",^DJI;,11696.86,11726.94,11599.68,11674.76,188720000.0,11674.76,Italy becomes the first country to ban plastic...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2498,2021-12-27,1,Windsor Castle intruder 'armed with a crossbow...,Israeli PM announces plan to double the amount...,Iran threatens nuclear explosion in Israel????...,What Putin's Russia fears most of all | The Sp...,Call It Like I See It,"Training Civilians, Ukraine Nurtures a Resista...","Training Civilians, Ukraine Nurtures a Resista...",The living Christmas Tree,...,Colorado mom found homeless in NYC now ?????sa...,"Tom Holland, dating taller Zendaya, likes post...",^DJI;,35954.48,36306.61,35954.48,36302.38,244350000.0,36302.38,Windsor Castle intruder armed with a crossbow ...
2499,2021-12-28,1,India freezes bank accounts of Mother Teresa's...,"ENTREVISTA: A MIQUEL CRUZ, ESCRITOR Y NOVELIST...",Jesus statue smashed in spate of attacks on In...,Please join me live This Now 7am US Eastern ti...,New study finds booster protection against Omi...,(STL.News) Thirty-five people were massacred o...,U.S. President Biden signs US$770 billion defe...,CDC cuts quarantine time for COVID-affected Am...,...,?????Karen????? caught on video slapping passe...,How Ethiopia?????s Conflict Deepened in 2021,^DJI;,36302.99,36527.26,36302.99,36398.21,239090000.0,36398.21,India freezes bank accounts of Mother Teresas ...
2500,2021-12-29,1,Biden signs $740B defense policy bill to overh...,"Afghanistan?????s Former Female Troops, Once H...","(STL.News) Last week, China made two deliverie...","NAKED JOURNALIST ""FALLS"" FROM WINDOW IN RUSSIA","Chile Rewrites Its Constitution, Confronting C...",Haile banks got invited by Milla Mac and it crazy,Evidence from the UK suggests Australia should...,Read the latest stories through PIB tweets as ...,...,?????Karen????? who slapped Delta passenger is...,China Says It Complained to UN About Musk?????...,^DJI;,36421.14,36571.55,36396.19,36488.63,213480000.0,36488.63,Biden signs $740B defense policy bill to overh...
2501,2021-12-30,0,Locked-down residents in one of China's bigges...,REFLEXI??N: GRATITUD EN LA ADVERSIDAD,PACHI LARREA: EN SU PUNTO,Israel's Gantz Hosts Palestinian President Abb...,"Vean esta particular cena de noche buena, no l...",Hong Kong Police Raid Office of Pro-Democracy ...,this is an article,COVID-19: France hit by ?????dizzying????? dai...,...,After 2 years China still not allowing interna...,"Harry, Ron and Hermione reunite for Harry Pott...",^DJI;,36522.48,36679.44,36372.13,36398.08,205620000.0,36398.08,Locked-down residents in one of Chinas biggest...


In [26]:
# different authors can interpret the same news from different perspectives
# and write different headlines. thus, subjectivity and objectivity 
# play an important role in understanding the meaning behind the news

### get subjectivity:

def get_subjectivity(text):
    return TextBlob(text).sentiment.subjectivity
    
### get polarity:

def get_polarity(text):
    return TextBlob(text).sentiment.polarity

In [27]:
index_headlines_data['Subjectivity'] = index_headlines_data['Combined_News'].apply(get_subjectivity)
index_headlines_data['Polarity'] = index_headlines_data['Combined_News'].apply(get_polarity)

In [28]:
index_headlines_data

Unnamed: 0,date,label,top1,top2,top3,top4,top5,top6,top7,top8,...,symbol,open,high,low,close,volume,adjusted,Combined_News,Subjectivity,Polarity
0,2011-03-01,1,"German Interior Minister: ""WikiLeaks is irrita...",Her father killed the boy and four of his brot...,An anonymous group of Palestinian students has...,"The Guardian's New Year resolutions: ""Visa, Ma...","""The diplomatic telegrams that WikiLeaks publi...","Chinese police's flight to congo to ""rescue"" 1...","Small town of Zug, Switzerland is the headquar...",'Israel Navy chasing Gaza-bound Asia 1' - Two ...,...,^DJI;,11577.43,11711.47,11577.35,11670.75,203420000.0,11670.75,"German Interior Minister: ""WikiLeaks is irrita...",0.339644,0.007526
1,2011-04-01,1,Wikileaks releases cable of the July 1990 meet...,"""The WikiLeaks saga has a message . . . Your d...",7.1 magnitude earthquake hits Chile.,Lack of Jobs in Southern Europe Frustrates the...,The Village Where the Neo-Nazis Rule. Hitler ...,Settlers set fire to home as seven Palestinian...,Actor Pete Postlethwaite dies :(,"WikiLeaks: Israel Plans Total War on Lebanon, ...",...,^DJI;,11670.90,11698.22,11635.74,11691.18,178630000.0,11691.18,Wikileaks releases cable of the July 1990 meet...,0.416723,0.080403
2,2011-05-01,1,Punjab(Pakistan) governor Salman Taseer assass...,Some Christians have predicted the end of days...,"If Youre Homeless In Japan, Here Are Your Options",And so it starts...Sri Lanka considers banning...,Saudi Arabia announces that all online publish...,A 15-year-old girl is more likely to die in ch...,As Hungary takes over the rotating presidency ...,WikiLeaks: Drug Enforcement Agency Goes \r\nGl...,...,^DJI;,11688.61,11742.68,11652.89,11722.89,169990000.0,11722.89,PunjaPakistan) governor Salman Taseer assassin...,0.350014,0.092459
3,2011-06-01,0,Cable from the U.S. Embassy in Tel Aviv says I...,"Over 3,000 birds fall dead in AR, over 500 in ...",Assange: WikiLeaks does not have concerns for ...,Greek professor of linguistics at Cambridge Un...,"President Ahmedinejad ""had harsh words for the...",WikiLeaks: Chinese attacks on Google came from...,"""France is the country that conducts the most ...","Chinese adult children must, by law, visit par...",...,^DJI;,11716.93,11736.74,11667.46,11697.31,193080000.0,11697.31,Cable from the U.S. Embassy in Tel Aviv says I...,0.387307,0.072972
4,2011-07-01,0,Italy becomes the first country to ban plastic...,Five Myths About the Chinese Communist Party,The little red book that swept France: The la...,Furore over 'censored' edition of Huckleberry ...,"Together, Slim, Gates, Buffett, and Ambani con...",China hiding military build-up: WikiLeaks,Population 7 Billion - By 2045 global populati...,Israelis bribed to admit U.S. goods to Gaza: W...,...,^DJI;,11696.86,11726.94,11599.68,11674.76,188720000.0,11674.76,Italy becomes the first country to ban plastic...,0.414674,-0.011461
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2498,2021-12-27,1,Windsor Castle intruder 'armed with a crossbow...,Israeli PM announces plan to double the amount...,Iran threatens nuclear explosion in Israel????...,What Putin's Russia fears most of all | The Sp...,Call It Like I See It,"Training Civilians, Ukraine Nurtures a Resista...","Training Civilians, Ukraine Nurtures a Resista...",The living Christmas Tree,...,^DJI;,35954.48,36306.61,35954.48,36302.38,244350000.0,36302.38,Windsor Castle intruder armed with a crossbow ...,0.415769,0.076150
2499,2021-12-28,1,India freezes bank accounts of Mother Teresa's...,"ENTREVISTA: A MIQUEL CRUZ, ESCRITOR Y NOVELIST...",Jesus statue smashed in spate of attacks on In...,Please join me live This Now 7am US Eastern ti...,New study finds booster protection against Omi...,(STL.News) Thirty-five people were massacred o...,U.S. President Biden signs US$770 billion defe...,CDC cuts quarantine time for COVID-affected Am...,...,^DJI;,36302.99,36527.26,36302.99,36398.21,239090000.0,36398.21,India freezes bank accounts of Mother Teresas ...,0.384578,0.025649
2500,2021-12-29,1,Biden signs $740B defense policy bill to overh...,"Afghanistan?????s Former Female Troops, Once H...","(STL.News) Last week, China made two deliverie...","NAKED JOURNALIST ""FALLS"" FROM WINDOW IN RUSSIA","Chile Rewrites Its Constitution, Confronting C...",Haile banks got invited by Milla Mac and it crazy,Evidence from the UK suggests Australia should...,Read the latest stories through PIB tweets as ...,...,^DJI;,36421.14,36571.55,36396.19,36488.63,213480000.0,36488.63,Biden signs $740B defense policy bill to overh...,0.372645,0.050758
2501,2021-12-30,0,Locked-down residents in one of China's bigges...,REFLEXI??N: GRATITUD EN LA ADVERSIDAD,PACHI LARREA: EN SU PUNTO,Israel's Gantz Hosts Palestinian President Abb...,"Vean esta particular cena de noche buena, no l...",Hong Kong Police Raid Office of Pro-Democracy ...,this is an article,COVID-19: France hit by ?????dizzying????? dai...,...,^DJI;,36522.48,36679.44,36372.13,36398.08,205620000.0,36398.08,Locked-down residents in one of Chinas biggest...,0.337340,0.030059


# 2. Sentiment scores using SentimentIntensityAnalyzer 

In [29]:
# create function to get the sentiment score:

def getSIA(text):
    sia = SentimentIntensityAnalyzer()
    sentiment= sia.polarity_scores(text)
    return sentiment

In [30]:
# get Sentiment score for each day:
compound=[]
neg=[]
pos=[]
neu=[]
SIA=0

for i in range (0, len(index_headlines_data['Combined_News'])):
    SIA= getSIA(index_headlines_data['Combined_News'][i])
    compound.append(SIA['compound'])
    neg.append(SIA['neg'])
    pos.append(SIA['pos'])
    neu.append(SIA['neu'])

In [31]:
index_headlines_data['compound']= compound
index_headlines_data['neg']= neg
index_headlines_data['pos']=pos
index_headlines_data['neu']=neu

index_headlines_data

Unnamed: 0,date,label,top1,top2,top3,top4,top5,top6,top7,top8,...,close,volume,adjusted,Combined_News,Subjectivity,Polarity,compound,neg,pos,neu
0,2011-03-01,1,"German Interior Minister: ""WikiLeaks is irrita...",Her father killed the boy and four of his brot...,An anonymous group of Palestinian students has...,"The Guardian's New Year resolutions: ""Visa, Ma...","""The diplomatic telegrams that WikiLeaks publi...","Chinese police's flight to congo to ""rescue"" 1...","Small town of Zug, Switzerland is the headquar...",'Israel Navy chasing Gaza-bound Asia 1' - Two ...,...,11670.75,203420000.0,11670.75,"German Interior Minister: ""WikiLeaks is irrita...",0.339644,0.007526,-0.9967,0.185,0.087,0.728
1,2011-04-01,1,Wikileaks releases cable of the July 1990 meet...,"""The WikiLeaks saga has a message . . . Your d...",7.1 magnitude earthquake hits Chile.,Lack of Jobs in Southern Europe Frustrates the...,The Village Where the Neo-Nazis Rule. Hitler ...,Settlers set fire to home as seven Palestinian...,Actor Pete Postlethwaite dies :(,"WikiLeaks: Israel Plans Total War on Lebanon, ...",...,11691.18,178630000.0,11691.18,Wikileaks releases cable of the July 1990 meet...,0.416723,0.080403,-0.9917,0.108,0.050,0.842
2,2011-05-01,1,Punjab(Pakistan) governor Salman Taseer assass...,Some Christians have predicted the end of days...,"If Youre Homeless In Japan, Here Are Your Options",And so it starts...Sri Lanka considers banning...,Saudi Arabia announces that all online publish...,A 15-year-old girl is more likely to die in ch...,As Hungary takes over the rotating presidency ...,WikiLeaks: Drug Enforcement Agency Goes \r\nGl...,...,11722.89,169990000.0,11722.89,PunjaPakistan) governor Salman Taseer assassin...,0.350014,0.092459,-0.9927,0.117,0.052,0.831
3,2011-06-01,0,Cable from the U.S. Embassy in Tel Aviv says I...,"Over 3,000 birds fall dead in AR, over 500 in ...",Assange: WikiLeaks does not have concerns for ...,Greek professor of linguistics at Cambridge Un...,"President Ahmedinejad ""had harsh words for the...",WikiLeaks: Chinese attacks on Google came from...,"""France is the country that conducts the most ...","Chinese adult children must, by law, visit par...",...,11697.31,193080000.0,11697.31,Cable from the U.S. Embassy in Tel Aviv says I...,0.387307,0.072972,-0.9973,0.170,0.069,0.761
4,2011-07-01,0,Italy becomes the first country to ban plastic...,Five Myths About the Chinese Communist Party,The little red book that swept France: The la...,Furore over 'censored' edition of Huckleberry ...,"Together, Slim, Gates, Buffett, and Ambani con...",China hiding military build-up: WikiLeaks,Population 7 Billion - By 2045 global populati...,Israelis bribed to admit U.S. goods to Gaza: W...,...,11674.76,188720000.0,11674.76,Italy becomes the first country to ban plastic...,0.414674,-0.011461,-0.9983,0.255,0.045,0.700
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2498,2021-12-27,1,Windsor Castle intruder 'armed with a crossbow...,Israeli PM announces plan to double the amount...,Iran threatens nuclear explosion in Israel????...,What Putin's Russia fears most of all | The Sp...,Call It Like I See It,"Training Civilians, Ukraine Nurtures a Resista...","Training Civilians, Ukraine Nurtures a Resista...",The living Christmas Tree,...,36302.38,244350000.0,36302.38,Windsor Castle intruder armed with a crossbow ...,0.415769,0.076150,0.6966,0.040,0.059,0.901
2499,2021-12-28,1,India freezes bank accounts of Mother Teresa's...,"ENTREVISTA: A MIQUEL CRUZ, ESCRITOR Y NOVELIST...",Jesus statue smashed in spate of attacks on In...,Please join me live This Now 7am US Eastern ti...,New study finds booster protection against Omi...,(STL.News) Thirty-five people were massacred o...,U.S. President Biden signs US$770 billion defe...,CDC cuts quarantine time for COVID-affected Am...,...,36398.21,239090000.0,36398.21,India freezes bank accounts of Mother Teresas ...,0.384578,0.025649,-0.9700,0.113,0.069,0.818
2500,2021-12-29,1,Biden signs $740B defense policy bill to overh...,"Afghanistan?????s Former Female Troops, Once H...","(STL.News) Last week, China made two deliverie...","NAKED JOURNALIST ""FALLS"" FROM WINDOW IN RUSSIA","Chile Rewrites Its Constitution, Confronting C...",Haile banks got invited by Milla Mac and it crazy,Evidence from the UK suggests Australia should...,Read the latest stories through PIB tweets as ...,...,36488.63,213480000.0,36488.63,Biden signs $740B defense policy bill to overh...,0.372645,0.050758,-0.9839,0.118,0.046,0.836
2501,2021-12-30,0,Locked-down residents in one of China's bigges...,REFLEXI??N: GRATITUD EN LA ADVERSIDAD,PACHI LARREA: EN SU PUNTO,Israel's Gantz Hosts Palestinian President Abb...,"Vean esta particular cena de noche buena, no l...",Hong Kong Police Raid Office of Pro-Democracy ...,this is an article,COVID-19: France hit by ?????dizzying????? dai...,...,36398.08,205620000.0,36398.08,Locked-down residents in one of Chinas biggest...,0.337340,0.030059,-0.9798,0.103,0.053,0.844


In [41]:
#Create new dataframe contain relevants features:

new_combined_data= index_headlines_data[['label','open', 'high', 'low','volume','Subjectivity','Polarity','compound','neg','pos','neu']]
new_combined_data

Unnamed: 0,label,open,high,low,volume,Subjectivity,Polarity,compound,neg,pos,neu
0,1,11577.43,11711.47,11577.35,203420000.0,0.339644,0.007526,-0.9967,0.185,0.087,0.728
1,1,11670.90,11698.22,11635.74,178630000.0,0.416723,0.080403,-0.9917,0.108,0.050,0.842
2,1,11688.61,11742.68,11652.89,169990000.0,0.350014,0.092459,-0.9927,0.117,0.052,0.831
3,0,11716.93,11736.74,11667.46,193080000.0,0.387307,0.072972,-0.9973,0.170,0.069,0.761
4,0,11696.86,11726.94,11599.68,188720000.0,0.414674,-0.011461,-0.9983,0.255,0.045,0.700
...,...,...,...,...,...,...,...,...,...,...,...
2498,1,35954.48,36306.61,35954.48,244350000.0,0.415769,0.076150,0.6966,0.040,0.059,0.901
2499,1,36302.99,36527.26,36302.99,239090000.0,0.384578,0.025649,-0.9700,0.113,0.069,0.818
2500,1,36421.14,36571.55,36396.19,213480000.0,0.372645,0.050758,-0.9839,0.118,0.046,0.836
2501,0,36522.48,36679.44,36372.13,205620000.0,0.337340,0.030059,-0.9798,0.103,0.053,0.844


# 3. Build a Random Forest Model with Sentiment Analysis

In [175]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score

In [223]:
#create the feature data set:
x =new_combined_data
x=np.array(x.drop(['label'],1))

#create the target data set:
y= np.array(new_combined_data['label'])

# Split the data:
x_train, x_test, y_train, y_test= train_test_split(x,y, test_size=0.2, random_state= 3)

# create and train the model:
randomclassifier=RandomForestClassifier(n_estimators=250,criterion='entropy')
randomclassifier.fit(x_train,y_train)
# Random Forests Model predicton:
y_pred_rf = randomclassifier.predict(x_test)

  This is separate from the ipykernel package so we can avoid doing imports until


In [224]:
### training set result
training_result = randomclassifier.score(x_train, y_train)
print(training_result)
### testing set result
testing_result = randomclassifier.score(x_test, y_test)
print(testing_result)
## Random Forests Classification Model Report
print(classification_report(y_test, y_pred_rf))

1.0
0.5588822355289421
              precision    recall  f1-score   support

           0       0.51      0.45      0.48       225
           1       0.59      0.64      0.62       276

    accuracy                           0.56       501
   macro avg       0.55      0.55      0.55       501
weighted avg       0.55      0.56      0.56       501



# 4. Build a SVM Model with Sentiment Analysis

In [178]:
from sklearn.svm import SVC
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score
svm = SVC()


SVC()

In [225]:
#create the feature data set:
x =new_combined_data
x=np.array(x.drop(['label'],1))

#create the target data set:
y= np.array(new_combined_data['label'])

# Split the data:
x_train, x_test, y_train, y_test= train_test_split(x,y, test_size=0.2, random_state= 3)

# create and train the model:
svm.fit(x_train, y_train)
# Random Forests Model predicton:
y_pred_svm = svm.predict(x_test)

  This is separate from the ipykernel package so we can avoid doing imports until


In [226]:
### training set result
training_result = svm.score(x_train, y_train)
print(training_result)
### testing set result
testing_result = svm.score(x_test, y_test)
print(testing_result)
## SVM Classification Model Report
print(classification_report(y_test, y_pred_svm))

0.5469530469530469
0.562874251497006
              precision    recall  f1-score   support

           0       0.71      0.04      0.08       225
           1       0.56      0.99      0.71       276

    accuracy                           0.56       501
   macro avg       0.64      0.51      0.40       501
weighted avg       0.63      0.56      0.43       501



# 5. Build a MLP Neural Networks Model with Sentiment Analysis

In [229]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score
mlp = MLPClassifier()

In [230]:
#create the feature data set:
x =new_combined_data
x=np.array(x.drop(['label'],1))

#create the target data set:
y= np.array(new_combined_data['label'])

# Split the data:
x_train, x_test, y_train, y_test= train_test_split(x,y, test_size=0.2, random_state= 3)

# create and train the model:
mlp.fit(x_train, y_train)

# MLP Model predicton:
y_pred_mlp = mlp.predict(x_test)

  This is separate from the ipykernel package so we can avoid doing imports until


In [232]:
### training set result
training_result = mlp.score(x_train, y_train)
print(training_result)
### testing set result
testing_result = mlp.score(x_test, y_test)
print(testing_result)
## MLP Classification Model Report
print(classification_report(y_test, y_pred_mlp))

0.5409590409590409
0.5508982035928144
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       225
           1       0.55      1.00      0.71       276

    accuracy                           0.55       501
   macro avg       0.28      0.50      0.36       501
weighted avg       0.30      0.55      0.39       501



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# 6. Build a XGBoost Model with Sentiment Analysis

In [186]:
from xgboost import XGBClassifier
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score

In [233]:
#create the feature data set:
x =new_combined_data
x=np.array(x.drop(['label'],1))

#create the target data set:
y= np.array(new_combined_data['label'])

# Split the data:
x_train, x_test, y_train, y_test= train_test_split(x,y, test_size=0.2, random_state= 3)

# create and train the model:
xgb = XGBClassifier(random_state =1)
xgb.fit(x_train, y_train)

# XGBoost Model predicton:
y_pred_xgb = xgb.predict(x_test)

  This is separate from the ipykernel package so we can avoid doing imports until


In [235]:
### training set result
training_result = xgb.score(x_train, y_train)
print(training_result)
### testing set result
testing_result = xgb.score(x_test, y_test)
print(testing_result)
## XGBoost Classification Model Report
print(classification_report(y_test, y_pred_xgb))

0.7567432567432567
0.5588822355289421
              precision    recall  f1-score   support

           0       0.51      0.40      0.45       225
           1       0.59      0.68      0.63       276

    accuracy                           0.56       501
   macro avg       0.55      0.54      0.54       501
weighted avg       0.55      0.56      0.55       501



# 7. Build a Naive Bayes Model with Sentiment Analysis

In [210]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score


In [236]:
#create the feature data set:
x =new_combined_data
x=np.array(x.drop(['label'],1))

#create the target data set:
y= np.array(new_combined_data['label'])

# Split the data:
x_train, x_test, y_train, y_test= train_test_split(x,y, test_size=0.2, random_state= 3)

# create and train the model:
naive = GaussianNB()
naive.fit(x_train, y_train)

# Naive Bayes Model predicton:
y_pred_naive = naive.predict(x_test)

  This is separate from the ipykernel package so we can avoid doing imports until


In [237]:
### training set result
training_result = naive.score(x_train, y_train)
print(training_result)
### testing set result
testing_result = naive.score(x_test, y_test)
print(testing_result)
## SVM Classification Model Report
print(classification_report(y_test, y_pred_naive))

0.5474525474525475
0.5508982035928144
              precision    recall  f1-score   support

           0       0.50      0.06      0.11       225
           1       0.55      0.95      0.70       276

    accuracy                           0.55       501
   macro avg       0.53      0.51      0.41       501
weighted avg       0.53      0.55      0.44       501



# 8. Build a Linear Discriminant Analysis Model with Sentiment Analysis

In [220]:
#create the feature data set:
x =new_combined_data
x=np.array(x.drop(['label'],1))

#create the target data set:
y= np.array(new_combined_data['label'])

# Split the data:
x_train, x_test, y_train, y_test= train_test_split(x,y, test_size=0.2, random_state= 3)

# create and train the model:
lda= LinearDiscriminantAnalysis().fit(x_train, y_train)
# LDA Model predicton:
y_pred= lda.predict(x_test)

  This is separate from the ipykernel package so we can avoid doing imports until


In [221]:
### training set result
training_result = lda.score(x_train, y_train)
print(training_result)
### testing set result
testing_result = lda.score(x_test, y_test)
print(testing_result)
## LDA Classification Model Report
print(classification_report(y_test, y_pred))

0.8506493506493507
0.8562874251497006
              precision    recall  f1-score   support

           0       0.88      0.78      0.83       225
           1       0.84      0.92      0.88       276

    accuracy                           0.86       501
   macro avg       0.86      0.85      0.85       501
weighted avg       0.86      0.86      0.86       501

