# 2. Data Merging

In [1]:
import pandas as pd

In [34]:
STOCK_DATASET_FILE = "../data/aapl.csv"
NEWS_DATASET_FILE = "../data/us_equities_news.csv"
STOCK_NEWS_DATASET_FILE = "../data/aapl_us_equities_news.csv"

## 2.1 Load data

### 2.1.1 Load stock data

In [3]:
%%time
df_aapl = pd.read_csv(STOCK_DATASET_FILE)

CPU times: user 4.43 ms, sys: 765 µs, total: 5.2 ms
Wall time: 4.51 ms


### 2.1.2 Load news data

In [4]:
%%time
df_us_equities_news = pd.read_csv(NEWS_DATASET_FILE)

CPU times: user 4.57 s, sys: 314 ms, total: 4.89 s
Wall time: 4.89 s


## 2.2 Prepare data

### 2.2.1 Prepare stock data

In [5]:
df_aapl = df_aapl.rename(columns={
    "Date": "date",
    "Open": "open",
    "High": "high",
    "Low": "low",
    "Close": "close",
    "Adj Close": "adj_close",
    "Volume": "volume",
})

In [6]:
df_aapl["date"] = pd.to_datetime(df_aapl["date"])
df_aapl["open"] = df_aapl["open"].astype("float")
df_aapl["high"] = df_aapl["high"].astype("float")
df_aapl["low"] = df_aapl["low"].astype("float")
df_aapl["close"] = df_aapl["close"].astype("float")
df_aapl["adj_close"] = df_aapl["adj_close"].astype("float")
df_aapl["volume"] = df_aapl["volume"].astype(int)

### 2.2.2 Prepare news data

In [7]:
df_us_equities_news = df_us_equities_news.rename(columns={"release_date": "date"})

In [8]:
df_us_equities_news["id"] = df_us_equities_news["id"].astype(int)
df_us_equities_news["ticker"] = df_us_equities_news["ticker"].astype("category")
df_us_equities_news["title"] = df_us_equities_news["title"].astype(str)
df_us_equities_news["category"] = df_us_equities_news["category"].astype("category")
df_us_equities_news["content"] = df_us_equities_news["content"].astype(str)
df_us_equities_news["date"] = pd.to_datetime(df_us_equities_news["date"])
df_us_equities_news["provider"] = df_us_equities_news["provider"].astype("category")
df_us_equities_news["url"] = df_us_equities_news["url"].astype(str)
df_us_equities_news["article_id"] = df_us_equities_news["article_id"].astype(int)

## 2.3 Join data

In [30]:
df_appl_us_equities_news = pd.merge(df_us_equities_news, df_aapl, on="date", how="inner").sort_values("date")

## 2.4 Inspect data

In [33]:
df_appl_us_equities_news.head(10)

Unnamed: 0,id,ticker,title,category,content,date,provider,url,article_id,open,high,low,close,adj_close,volume
163975,292541,BAC,UPDATE 4 Rouble at record low as Russia faces ...,news,Updates with day s interventions and futures ...,2009-01-15,Reuters,https://www.investing.com/news/forex-news/upda...,20872,2.8775,3.004286,2.858929,2.977857,2.542589,1831634000
163986,388968,BBY,UPDATE 4 DSG to cut more costs as sales fall w...,news,Xmas lfl sales down 10 percent gross margin...,2009-01-15,Reuters,https://www.investing.com/news/forex-news/upda...,20724,2.8775,3.004286,2.858929,2.977857,2.542589,1831634000
163985,342945,NYT,Lack of fame aids Geithner over US tax mistake,news,By Matthew Bigg\n ATLANTA Jan 15 Reuters ...,2009-01-15,Reuters,https://www.investing.com/news/forex-news/lack...,20995,2.8775,3.004286,2.858929,2.977857,2.542589,1831634000
163984,326167,TGT,CORRECTED Depression ahead prepare for stoc...,news,Corrects figure in paragraph 3 \n LONDON Jan...,2009-01-15,Reuters,https://www.investing.com/news/forex-news/corr...,20779,2.8775,3.004286,2.858929,2.977857,2.542589,1831634000
163983,326166,TGT,Euro area CPI Falls Down To 1 6 In December,news,Release Explanation The CPI measures the aver...,2009-01-15,LFB Forex,https://www.investing.com/news/forex-news/euro...,20745,2.8775,3.004286,2.858929,2.977857,2.542589,1831634000
163982,326165,TGT,German Final CPI Rose In December As Expected,news,Release Explanation The CPI measures the aver...,2009-01-15,LFB Forex,https://www.investing.com/news/forex-news/germ...,20694,2.8775,3.004286,2.858929,2.977857,2.542589,1831634000
163981,326120,TGT,UPDATE 2 Fed s Evans U S in midst of seriou...,news,Updates with comments from Q A \n By Kristina...,2009-01-15,Reuters,https://www.investing.com/news/forex-news/upda...,20979,2.8775,3.004286,2.858929,2.977857,2.542589,1831634000
163980,326119,TGT,Fed s Yellen US must act aggressively on eco...,news,SAN FRANCISCO Jan 15 Reuters The United S...,2009-01-15,Reuters,https://www.investing.com/news/forex-news/fed'...,20976,2.8775,3.004286,2.858929,2.977857,2.542589,1831634000
163967,227446,KNMCY,INTERVIEW UPDATE 2 Square Enix enjoys robust y...,news,Year end demand healthy despite economy\n ...,2009-01-15,Reuters,https://www.investing.com/news/forex-news/inte...,20706,2.8775,3.004286,2.858929,2.977857,2.542589,1831634000
163968,254443,TM,TOPWRAP 4 Bank crisis deepens anew ECB poised...,news,European Central Bank seen cutting interest ...,2009-01-15,Reuters,https://www.investing.com/news/forex-news/topw...,20748,2.8775,3.004286,2.858929,2.977857,2.542589,1831634000


## 2.5 Save data

In [37]:
df_appl_us_equities_news.to_csv(STOCK_NEWS_DATASET_FILE, index=False)

# 99. Scratch