In [2]:
from google.colab import drive
drive.mount('/content/drive')  # auth flow pops up

Mounted at /content/drive


In [9]:
news_file  = "/content/drive/MyDrive/MCD_headlines.csv"
price_file = "/content/drive/MyDrive/MCD_price.csv"

In [10]:
import pandas as pd


news_df   = pd.read_csv(news_file)
prices_df = pd.read_csv(price_file)


news_df = news_df.rename(columns={
    "Date": "date",
    "Article_title": "headline"
})


news_df['date'] = pd.to_datetime(news_df['date']).dt.tz_localize(None)
prices_df['date'] = pd.to_datetime(prices_df['date']).dt.tz_localize(None)


daily_news = (
    news_df.groupby("date")['headline']
    .apply(lambda x: " [SEP] ".join(x))
    .reset_index()
)
prices_df = prices_df.sort_values('date').reset_index(drop=True)
prices_df['close_t_plus_2'] = prices_df['close'].shift(-2)

# Merge on date
dataset = pd.merge(
    daily_news,
    prices_df[['date', 'close_t_plus_2']],
    on='date',
    how='inner'
)


max_date = dataset['date'].max()
min_date = max_date - pd.DateOffset(years=5)
dataset = dataset[dataset['date'] >= min_date]


dataset.to_csv("MCD_headlines_price_t_plus_2.csv", index=False)
print("Saved dataset with shape:", dataset.shape)
print(dataset.head())

Saved dataset with shape: (656, 3)
          date                                           headline  \
224 2015-05-27  Stifel, Morningstar Analysts React To McDonald...   
225 2015-05-28  Deutsche Bank Makes McDonald's A Top Pick [SEP...   
226 2015-05-29  Look Out For The Restaurant Industry, Deutsche...   
227 2015-06-03  Exclusive: Boston Market CEO Talks 'Chicken Wa...   
228 2015-06-04  What To Expect From Tomorrow's Jobs Report [SE...   

     close_t_plus_2  
224       95.930000  
225       96.220001  
226       96.290001  
227       95.540001  
228       95.320000  


In [11]:
dataset.info()
dataset.describe()

<class 'pandas.core.frame.DataFrame'>
Index: 656 entries, 224 to 879
Data columns (total 3 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   date            656 non-null    datetime64[ns]
 1   headline        656 non-null    object        
 2   close_t_plus_2  656 non-null    float64       
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 20.5+ KB


Unnamed: 0,date,close_t_plus_2
count,656,656.0
mean,2017-10-26 07:10:14.634146304,150.939131
min,2015-05-27 00:00:00,91.209999
25%,2016-06-21 18:00:00,119.447498
50%,2017-07-20 12:00:00,153.979996
75%,2019-04-19 12:00:00,181.575001
max,2020-05-27 00:00:00,219.729996
std,,35.96509


In [13]:
import pandas as pd
dataset = pd.read_csv("MCD_headlines_price_t_plus_2.csv")
print(dataset.head())
print(dataset.tail())
dataset

         date                                           headline  \
0  2015-05-27  Stifel, Morningstar Analysts React To McDonald...   
1  2015-05-28  Deutsche Bank Makes McDonald's A Top Pick [SEP...   
2  2015-05-29  Look Out For The Restaurant Industry, Deutsche...   
3  2015-06-03  Exclusive: Boston Market CEO Talks 'Chicken Wa...   
4  2015-06-04  What To Expect From Tomorrow's Jobs Report [SE...   

   close_t_plus_2  
0       95.930000  
1       96.220001  
2       96.290001  
3       95.540001  
4       95.320000  
           date                                           headline  \
651  2020-05-20  Krasney Financial LLC Buys SPDR Portfolio Shor...   
652  2020-05-21  Why Record Corporate Debt Might Not Be So Bad:...   
653  2020-05-22       McDonald's Announces Quarterly Cash Dividend   
654  2020-05-26  Verus Capital Partners, Llc Buys Microsoft Cor...   
655  2020-05-27           Lessons on When to Sell From Terry Smith   

     close_t_plus_2  
651      184.410004  
652   

Unnamed: 0,date,headline,close_t_plus_2
0,2015-05-27,"Stifel, Morningstar Analysts React To McDonald...",95.930000
1,2015-05-28,Deutsche Bank Makes McDonald's A Top Pick [SEP...,96.220001
2,2015-05-29,"Look Out For The Restaurant Industry, Deutsche...",96.290001
3,2015-06-03,Exclusive: Boston Market CEO Talks 'Chicken Wa...,95.540001
4,2015-06-04,What To Expect From Tomorrow's Jobs Report [SE...,95.320000
...,...,...,...
651,2020-05-20,Krasney Financial LLC Buys SPDR Portfolio Shor...,184.410004
652,2020-05-21,Why Record Corporate Debt Might Not Be So Bad:...,184.839996
653,2020-05-22,McDonald's Announces Quarterly Cash Dividend,187.720001
654,2020-05-26,"Verus Capital Partners, Llc Buys Microsoft Cor...",188.729996
