# Sentiment Analysis for Merger and Acquisition

### PART1: Getting Data and saving as a CSV file.

In [1]:
#Initial Imports

import pandas as pd
import numpy as np
from pathlib import Path

import os
from dotenv import load_dotenv
load_dotenv()

import warnings
warnings.filterwarnings("ignore")

## Getting News Api and Creating a DataFrame

In [2]:
# Create a newsapi client
from newsapi import NewsApiClient
api_key = os.getenv("NEWS")
newsapi = NewsApiClient(api_key=api_key)

In [3]:
# Create a DataFrame
from datetime import datetime

def create_df(news):
    news =  pd.DataFrame(articles)
    news = news.filter(items=["publishedAt", 'title', 'content' , 'description'])
    news = news.rename(columns={"publishedAt": 'date', 'content' : 'text'})
    news['date'] = pd.to_datetime(news['date']).dt.date

    news.dropna(inplace=True)

    return news


## Getting the News Data

In [4]:
articles = []
keyword = 'disney fox merger'

for i in range(1,20,1):
    news1 = newsapi.get_everything(
        q= keyword,
        language = 'en', 
        sort_by = 'relevancy',
        page_size = 100,
        page = i,
        from_param = '2018-01-01', 
        to = '2019-07-01'
    )
    articles.extend(news1['articles'])

print(f" Total news articles Disney-Fox merger & acquisition : {news1['totalResults']}")

 Total news articles Disney-Fox merger & acquisition : 5701


In [5]:
articles = []
keyword = 'visa plaid '

for i in range(1,20,1):
    news2 = newsapi.get_everything(
        q= keyword,
        language = 'en', 
        sort_by = 'relevancy',
        page_size = 100,
        page = i,
        from_param = '2020-01-01', 
        to = '2020-03-01'
    )
    articles.extend(news2['articles'])

print(f" Total news articles Visa-Plaid merger & acquisition : {news2['totalResults']}")

 Total news articles Visa-Plaid merger & acquisition : 317


In [6]:
articles3 = []
keyword = 'intel apple'

for i in range(1,20,1):
    news3 = newsapi.get_everything(
        q= keyword,
        language = 'en', 
        sort_by = 'relevancy',
        page_size = 100,
        page = i,
        from_param = '2019-03-01', 
        to = '2021-01-01'
    )
    articles.extend(news3['articles'])

print(f" Total news articles Apple-Intel merger & acquisition : {news3['totalResults']}")

 Total news articles Apple-Intel merger & acquisition : 34330


In [7]:
news1df = create_df(news1)
news2df = create_df(news2)
news3df = create_df(news3)

In [8]:
news2df.tail()

Unnamed: 0,date,title,text,description
2212,2019-09-26,'Start Here': White House releases Ukraine cal...,"It's Thursday, Sept. 26, 2019. Let's start her...","It's Thursday, Sept. 26, 2019. Here's what you..."
2213,2020-08-13,95% of Chinese iPhone owners would abandon App...,A huge Chinese survey with more than 1.2 milli...,A huge Chinese survey with more than 1.2 milli...
2214,2020-08-05,Apple Updates 27-Inch iMac With 10th Generatio...,Apple has just released a new 27-inch model fo...,Apple has just released a new 27-inch model fo...
2215,2020-07-22,Apple issues third macOS Big Sur beta to devel...,Apple on Wednesday released a third beta versi...,Apple on Wednesday released a third beta versi...
2216,2019-05-22,Google's problems in China are bigger than Hua...,Google has long had a complicated relationship...,President Trump's ban on the world's second-la...


In [9]:
all_articles = pd.concat([news1df, news2df, news3df], axis='rows')

In [11]:
all_articles = all_articles.sort_values(by ='date', ascending=False)
all_articles.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6639 entries, 1438 to 383
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   date         6639 non-null   object
 1   title        6639 non-null   object
 2   text         6639 non-null   object
 3   description  6639 non-null   object
dtypes: object(4)
memory usage: 259.3+ KB


## Save the file CSV format 

In [12]:
file_path = Path("articles.csv")
all_articles.to_csv(file_path, index=False, encoding='utf-8-sig')