## Stock Sentiment Analysis using News Headlines

In [2]:
import pandas as pd

In [3]:
df=pd.read_csv("C:/Users/nehar/Desktop/My research paper/RedditNews.csv", encoding = "ISO-8859-1")

In [11]:
df.head(15)

Unnamed: 0,Date,News
0,2016-07-01,A 117-year-old woman in Mexico City finally re...
1,2016-07-01,IMF chief backs Athens as permanent Olympic host
2,2016-07-01,"The president of France says if Brexit won, so..."
3,2016-07-01,British Man Who Must Give Police 24 Hours' Not...
4,2016-07-01,100+ Nobel laureates urge Greenpeace to stop o...
5,2016-07-01,Brazil: Huge spike in number of police killing...
6,2016-07-01,Austria's highest court annuls presidential el...
7,2016-07-01,"Facebook wins privacy case, can track any Belg..."
8,2016-07-01,Switzerland denies Muslim girls citizenship af...
9,2016-07-01,China kills millions of innocent meditators fo...


In [6]:
#Checking if it is imbalanced or balanced 
label_counts = df['Date'].value_counts()
print(label_counts)

2008-10-26    50
2009-10-25    50
2016-07-01    25
2011-02-17    25
2011-02-25    25
              ..
2009-12-24    23
2009-09-26    23
2011-04-21    23
2009-09-19    23
2009-09-15    22
Name: Date, Length: 2943, dtype: int64


In [17]:
import pandas as pd
from nltk.sentiment.vader import SentimentIntensityAnalyzer
# import SentimentIntensityAnalyzer class
# from vaderSentiment.vaderSentiment module.
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Initialize the VADER sentiment analyzer
sid = SentimentIntensityAnalyzer()

# Calculate sentiment scores and categorize sentiment for each news article
df['Sentiment_Score'] = df['News'].apply(lambda x: sid.polarity_scores(x)['compound'])
df['Sentiment'] = df['Sentiment_Score'].apply(lambda score: 'Positive' if score > 0 else ('Negative' if score < 0 else 'Neutral'))

# Print the results
print(df[['News', 'Sentiment']])

                                                    News Sentiment
0      A 117-year-old woman in Mexico City finally re...  Negative
1       IMF chief backs Athens as permanent Olympic host  Negative
2      The president of France says if Brexit won, so...  Positive
3      British Man Who Must Give Police 24 Hours' Not...  Negative
4      100+ Nobel laureates urge Greenpeace to stop o...  Negative
...                                                  ...       ...
73603  b'Man goes berzerk in Akihabara and stabs ever...  Negative
73604  b'Threat of world AIDS pandemic among heterose...  Positive
73605  b'Angst in Ankara: Turkey Steers into a Danger...  Negative
73606  b"UK: Identity cards 'could be used to spy on ...  Negative
73607  b'Marriage, they said, was reduced to the stat...  Negative

[73608 rows x 2 columns]


In [18]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Split the data into train and test sets
train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)

# Create TF-IDF features
tfidf_vectorizer = TfidfVectorizer(max_features=1000, stop_words='english')
train_tfidf = tfidf_vectorizer.fit_transform(train_data['News'])
test_tfidf = tfidf_vectorizer.transform(test_data['News'])

# Train a Random Forest classifier
rf_classifier = RandomForestClassifier(random_state=42)
rf_classifier.fit(train_tfidf, train_data['Sentiment'])

# Predict on the test set
predictions = rf_classifier.predict(test_tfidf)

# Evaluate the model
report = classification_report(test_data['Sentiment'], predictions)

print(report)


              precision    recall  f1-score   support

    Negative       0.81      0.78      0.79      8259
     Neutral       0.54      0.75      0.63      3438
    Positive       0.71      0.47      0.56      3025

    accuracy                           0.71     14722
   macro avg       0.69      0.67      0.66     14722
weighted avg       0.73      0.71      0.71     14722



In [19]:
import spacy
# Load spaCy language model
nlp = spacy.load("en_core_web_sm")

# Function to perform NER on a text
def perform_ner(text):
    doc = nlp(text)
    entities = [(ent.text, ent.label_) for ent in doc.ents]
    return entities

# Perform NER on each news article
df['Named_Entities'] = df['News'].apply(perform_ner)

# Display the results
for index, row in df.iterrows():
    print(f"News {index + 1}: {row['News']}")
    print("Named Entities:", row['Named_Entities'])
    print("-" * 40)

News 1: A 117-year-old woman in Mexico City finally received her birth certificate, and died a few hours later. Trinidad Alvarez Lira had waited years for proof that she had been born in 1898.
Named Entities: [('117-year-old', 'DATE'), ('Mexico City', 'GPE'), ('a few hours later', 'TIME'), ('Trinidad Alvarez Lira', 'PERSON'), ('years', 'DATE'), ('1898', 'DATE')]
----------------------------------------
News 2: IMF chief backs Athens as permanent Olympic host
Named Entities: [('IMF', 'ORG'), ('Athens', 'GPE'), ('Olympic', 'PERSON')]
----------------------------------------
News 3: The president of France says if Brexit won, so can Donald Trump
Named Entities: [('France', 'GPE'), ('Brexit', 'PERSON'), ('Donald Trump', 'PERSON')]
----------------------------------------
News 4: British Man Who Must Give Police 24 Hours' Notice of Sex Threatens Hunger Strike: The man is the subject of a sexual risk order despite having never been convicted of a crime.
Named Entities: [('British', 'NORP'), 

Named Entities: [('CIA', 'ORG')]
----------------------------------------
News 2365: "Saudi Arabia's largest dairy company will soon be unable to farm alfalfa in its own parched country to feed its 170,000 cows. So it's turning to an unlikely place to grow the water-chugging crop -- the drought-stricken American Southwest."
Named Entities: [("Saudi Arabia's", 'GPE'), ('170,000', 'CARDINAL'), ('American Southwest', 'ORG')]
----------------------------------------
News 2366: China editor resigns over media censorship
Named Entities: [('China', 'GPE')]
----------------------------------------
News 2367: Russian weapons exports expected to receive $6bn-$7bn boost as result of marketing effect of bombing campaign in Syria
Named Entities: [('Russian', 'NORP'), ('6bn-$7bn', 'MONEY'), ('Syria', 'GPE')]
----------------------------------------
News 2368: Putin Faces Information Attack on Wealth, Friends, Russia Says:The Kremlin accused a group of international journalists of preparing an inform

News 4563: Giant squid that swam into Japanese bay guided back out to sea by diver - Toyama Bay dive shop owner joins 3.7-metre-long creature in the water, where it was lively, spurting ink and trying to entangle his tentacles around me
Named Entities: [('Japanese', 'NORP'), ('Toyama Bay', 'LOC'), ('3.7-metre-long', 'QUANTITY')]
----------------------------------------
News 4564: In Tanzania, a Horrific Fishing Tactic Destroys All Sea Life - It's the only country in Africa where fishermen widely use homemade bombs to increase their catch. 'Blast fishing' popular off the coast, where its been outlawed since 1970.
Named Entities: [('Tanzania', 'GPE'), ('a Horrific Fishing Tactic Destroys All Sea Life -', 'ORG'), ('Africa', 'LOC'), ('1970', 'DATE')]
----------------------------------------
News 4565: Paris Scales Back NYE Event Amid Terror Fears
Named Entities: [('Paris', 'GPE')]
----------------------------------------
News 4566: Ramadi residents fleeing ISIS: 'They wanted to use us as h

News 7045: The Israeli secret service is suspected of being behind a series of mysterious yet highly sophisticated cyber-spying attacks on decisive negotiations over Irans nuclear programme held at luxury hotels across Europe earlier this year, Swiss media has reported.
Named Entities: [('Israeli', 'NORP'), ('Irans', 'NORP'), ('Europe', 'LOC'), ('earlier this year', 'DATE'), ('Swiss', 'NORP')]
----------------------------------------
News 7046: Canadians eating less meat, taking a bite out of food industrys margins
Named Entities: [('Canadians', 'NORP')]
----------------------------------------
News 7047: David Cameron says Europe must get better at sending migrants home
Named Entities: [('David Cameron', 'PERSON'), ('Europe', 'LOC')]
----------------------------------------
News 7048: Eight Million Tons of Plastic Dumped in Ocean Every Year
Named Entities: [('Eight Million Tons', 'QUANTITY')]
----------------------------------------
News 7049: A civil lawsuit against Volkswagen was fi

News 9343: Chiles largest city shuts down as smog causes environmental emergency Authorities have recommended Santiagos 7 million residents avoid outdoor activity, closed 1,300 businesses and ordered 1.7m vehicles off the streets
Named Entities: [('Authorities', 'ORG'), ('7 million', 'CARDINAL'), ('1,300', 'CARDINAL'), ('1.7', 'CARDINAL')]
----------------------------------------
News 9344: The mighty lion, reclusive cave crabs and the world's rarest sea lion are among nearly 23,000 species at risk of dying out, a top conservation body warned
Named Entities: [('nearly 23,000', 'CARDINAL')]
----------------------------------------
News 9345: Nearly 700 people have died over past 4 days as a result of Karachi's heatwave
Named Entities: [('Nearly 700', 'CARDINAL'), ('over past 4 days', 'DATE'), ('Karachi', 'ORG')]
----------------------------------------
News 9346: 60 million kids live without their parents in China.
Named Entities: [('60 million', 'CARDINAL'), ('China', 'GPE')]
---------

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)




----------------------------------------
News 35635: Mass grave in London reveals how volcano caused global catastrophe - 
Scientists search for the explosive source of a disaster that wiped out almost 1/3 of Londoners in 1258
Named Entities: [('London', 'GPE'), ('almost 1/3', 'CARDINAL'), ('1258', 'DATE')]
----------------------------------------
News 35636: Robert Fisk: Syria's ancient treasures pulverised 
Named Entities: [('Robert Fisk', 'PERSON'), ('Syria', 'GPE')]
----------------------------------------
News 35637: Riot Leaves an Egyptian Village Without Christians
Named Entities: [('Riot Leaves', 'PERSON'), ('Egyptian', 'NORP'), ('Christians', 'NORP')]
----------------------------------------
News 35638: Faced with an MHRA crackdown on unlicensed medicines, one of Britain's leading manufacturers of homeopathic remedies has indicated it would be prepared to relabel its products 'confectionery' to circumvent regulation
Named Entities: [('Britain', 'GPE')]
----------------------

News 38074: Former Mossad chief backs Shin Bet counterpart over criticism of Netanyahu, Barak
Named Entities: [('Mossad', 'ORG'), ('Shin Bet', 'PERSON'), ('Netanyahu', 'PERSON'), ('Barak', 'PERSON')]
----------------------------------------
News 38075: Russia's xenophobia problem:  Putin vows to tighten immigration laws to curb Russia's large waves of foreigners, but critics worry this will only make life more dangerous for illegal migrants.

Named Entities: [('Russia', 'GPE'), ('Putin', 'PERSON'), ('Russia', 'GPE')]
----------------------------------------
News 38076: Canada announces that foreign workers can now be paid 15% less than Canadians.
Named Entities: [('Canada', 'GPE'), ('15%', 'PERCENT'), ('Canadians', 'NORP')]
----------------------------------------
News 38077: Major UK Supermarket Chain Boycotts Israeli Settlement Goods
Named Entities: [('UK', 'GPE')]
----------------------------------------
News 38078: Egypt Islamists upend race by backing liberal:  "'There is no comp

Named Entities: [('Kim Jong Un', 'PERSON')]
----------------------------------------
News 40254: At least 40 dead in Egypt Football violence
Named Entities: [('At least 40', 'CARDINAL'), ('Egypt', 'GPE')]
----------------------------------------
News 40255: Famous Egyptian actor sentenced to three months in jail for defaming Islam
Named Entities: [('Egyptian', 'NORP'), ('three months', 'DATE'), ('Islam', 'NORP')]
----------------------------------------
News 40256: Why Britain and Argentina are tussling, again, over the Falklands 
Named Entities: [('Britain', 'GPE'), ('Argentina', 'GPE'), ('Falklands', 'ORG')]
----------------------------------------
News 40257: Seized cash triggers political furor in Mexico -- The discovery in Mexico of almost $2 million cash in the luggage of a state government official has triggered a round of mud-slinging in the run-up to the presidential election in July.
Named Entities: [('Mexico', 'GPE'), ('Mexico', 'GPE'), ('almost $2 million', 'MONEY'), ('July

News 42404: A group of Russian scientists came out of an isolation chamber after over 500 days, proving that sending humans to mars may soon be feasible 
Named Entities: [('Russian', 'NORP'), ('over 500 days', 'DATE')]
----------------------------------------
News 42405: WikiLeaks founder 'abandoned' by Australian govt - KEVIN Rudd and his foreign affairs department have been accused of all but ignoring pleas from Julian Assange's legal team to protect the WikiLeaks founder from a possible death penalty in the US.
Named Entities: [('Australian', 'NORP'), ("Julian Assange's", 'ORG'), ('WikiLeaks', 'PRODUCT'), ('US', 'GPE')]
----------------------------------------
News 42406: The Hajj and Eid al-Adha 2011 [pics]
Named Entities: [('Hajj', 'PERSON')]
----------------------------------------
News 42407: The Sicilian Mafia: Seven year old jailed, tortured and thrown into acid
Named Entities: [('Sicilian', 'NORP'), ('Seven year old', 'DATE')]
----------------------------------------
News 424

News 44654: The YouTube man who was mugged by rioters pretending to help him is Asyraf Haziq, a Malaysian student
Named Entities: [('YouTube', 'ORG'), ('Asyraf Haziq', 'PERSON'), ('Malaysian', 'NORP')]
----------------------------------------
News 44655: Al Jazeera English has quashed several planned rebroadcasts of Shouting in the Dark, an hourlong documentary about Bahrains crackdown on pro-democracy protesters that had its debut last week and brought complaints from Bahraini authorities.
Named Entities: [('Al Jazeera', 'ORG'), ('English', 'LANGUAGE'), ('Shouting in the Dark', 'ORG'), ('Bahrains', 'ORG'), ('last week', 'DATE'), ('Bahraini', 'GPE')]
----------------------------------------
News 44656: Drug dealers may have wiped out "uncontacted" Amazon tribe
Named Entities: [('Amazon', 'ORG')]
----------------------------------------
News 44657: London Riots: 'Bleeding, I Called 999. A Tired Man Told Me To Go Home' --- "Unlike Los Angeles or Paris, the riots are not happening in ghet

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



 [('Bombay', 'GPE')]
----------------------------------------
News 69137: b'The Daily Show on the Mumbai Tragedy: "There have always been motherfuckers, there will always be motherfuckers, but what we can\'t do, is let them control our motherfucking lives."'
Named Entities: []
----------------------------------------
News 69138: b'The Vatican on gays: "Better dead than married!"'
Named Entities: [('Vatican', 'FAC')]
----------------------------------------
News 69139: b'People over 60s are the fastest-growing group of criminals in Japan, with more arrests than teenagers in many areas'
Named Entities: [('Japan', 'GPE')]
----------------------------------------
News 69140: b'Live Earth Concert Cancelled Due to Terrorist Attacks'
Named Entities: []
----------------------------------------
News 69141: b'UK government sneaking in mandatory ID cards'
Named Entities: []
----------------------------------------
News 69142: b'Until last week, no one in the U.K. had dared search Parliament since

News 71624: b'Philly-area activists released by China; Jeff said he was slapped around and threatend, with them saying, Do you want your head cut off or do you want to be shot?'
Named Entities: [('China', 'GPE'), ('Jeff', 'PERSON')]
----------------------------------------
News 71625: b"Vatican describes Hindu attack on Christian orphanage as a 'sin against God'"
Named Entities: [('Hindu', 'NORP'), ('Christian', 'NORP')]
----------------------------------------
News 71626: b'Protesters tell tales of Beijing detention- sleep deprivation, threats oh my'
Named Entities: [('Beijing', 'GPE')]
----------------------------------------
News 71627: b'Python kills zookeeper'
Named Entities: []
----------------------------------------
News 71628: b'Kelly was Murdered Says UK Intelligence Insider'
Named Entities: [('UK', 'GPE')]
----------------------------------------
News 71629: b"Fury as image of Myra Hindley appears in 'Best of British' film at 2012 Olympics party"
Named Entities: [('Myra Hind