# Import the necessary libraries

In [1]:
import yfinance as yf # Daily trading data
import pandas as pd
from prophet import Prophet # For Forecast
from datetime import datetime

from dateutil.relativedelta import relativedelta
from serpapi import GoogleSearch # For News report
import time


# SET UP

## Initialize the variables

In [2]:
# SET UP

# Define tickers
tickers = {
    "Meta": "META",
    "Amazon": "AMZN",
    "Netflix": "NFLX",
    "Google": "GOOGL",
    "NVIDIA": "NVDA"
}

# Date range for 5 years
start_date = "2018-08-01"
#start_date = datetime.strptime(start_date, '%Y-%m-%d')
end_date = datetime.today().strftime('%Y-%m-%d')

# Dates to forecast
forecast_dates = [
    '2025-08-31',
    '2025-12-31',
    '2026-08-31',
    '2026-12-31',
    '2027-08-31',
    '2027-12-31'
]

# for google search from serpapi
SERPAPI_KEY =  ""

## Define Functions

### Forecast

In [3]:

def forecast_stock(name, ticker,df):
    """
    Forecasting stock prices for the target dates using Prophet. 
    
    Parameters:
        name   : Name of the company.
        Ticker : abbreviation of the company by which it is uniquely identified.
        df     : the dataframe which contains the time series data

    Returns:
        pd.DataFrame: Forecasted values over the entire period.
    """
    print(f"\n📈 Forecasting for {name} ({ticker})")
    df = df[['Date','Close']].dropna()#.reset_index()  # drop NaNs and reset index
    
    # Prepare for Prophet
    df.columns = ['ds', 'y']
    df['y'] = pd.to_numeric(df['y'], errors='coerce')
    df = df.dropna(subset=['y'])

    # Check for sufficient data
    if df.shape[0] < 30:
        print("❌ Not enough data for forecasting.")
        return None
    
    # Fit Prophet
    model = Prophet(daily_seasonality=True)
    model.fit(df)

    # Forecast for next ~900 days
    future = model.make_future_dataframe(periods=900)
    forecast = model.predict(future)

    # Display forecast for forecast dates given in the initial set up
    forecast['ds'] = pd.to_datetime(forecast['ds']).dt.strftime('%Y-%m-%d')

    for target_date in forecast_dates:
        row = forecast[forecast['ds'] == target_date]
        if not row.empty:
            print(f"{target_date}: ${row['yhat'].values[0]:.2f}")
        else:
            print(f"{target_date}: ❌ No forecast (outside prediction range)")
    return forecast

### Spike Detector

In [4]:
# Spike Detection Function

def detect_price_spikes(df, threshold = 0.05):
    """
    Detects rows where the daily percentage change in closing price exceeds a threshold (up or down).
    
    Parameters:
        df (pd.DataFrame): DataFrame with at least a 'Close' column.
        threshold (float): The percentage change threshold to consider a spike (e.g., 0.05 = 5%).

    Returns:
        pd.DataFrame: Subset of the original DataFrame with significant changes.
    """
    df['pctchange'] = df['Close'].pct_change()
    spikes = df[abs(df['pctchange']) > threshold]
    return spikes[['Date', 'Close', 'pctchange']]

### Scraper: FAANG stock spike headlines

In [5]:
# Scraper  with a limit to headlines and also relevancy

def get_news_headlines(company, date, limit = 5, keywords = None):
    """
    Searches and scrapes news headlines using GoogleNews via SerpAPI related to each spike that happened.
    
    Parameters:
        company : str - Name of the company.
        date    : The Date (YYYY-MM-DD) to search for news.
        limit   : int  - Max number of articles to return.
        keyword : List - List of relevant keywords to filter articles.

    Returns:
        List : list of headlines with title, link, source, etc.
    """
    if keywords is None:
        keywords = ["stock", "price", "earnings", "forecast", "revenue", "market"]
    
    params = {
        "engine": "google_news",
        "q": f"{company} stock price OR earnings",
        "api_key": SERPAPI_KEY,
        "hl": "en",
        "gl": "us",
        "from": date,
        "to": date
    }

    search = GoogleSearch(params)
    results = search.get_dict()
    articles = results.get("news_results", [])
    
    headlines = []
    for article in articles:
        title = article.get("title", "")
        if any(kw.lower() in title.lower() for kw in keywords):
            link = article.get("link", "")
            source = article.get("source", "")
            headlines.append({"company": company, "date": date, "title": title, "link": link, "source": source})
        if len(headlines) >= limit:
            break

    return headlines

# Get the Data

In [6]:
for name, ticker in tickers.items():
    df = yf.download(ticker, start=start_date, end=end_date,auto_adjust=True)
    df.reset_index(inplace=True)
    df.columns = ['_'.join(col).strip() if isinstance(col, tuple) else col for col in df.columns]
    column_name= (df.columns).to_list()
    new_column = [x.split('_')[0] for x in column_name] # strip of the ticker in the column name
    df.columns= new_column
    filename = f'{name}.csv'
    df.to_csv(filename, index=True)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


# Forecast

In [7]:
for name, ticker in tickers.items():
    filename = f'{name}.csv'
    df = pd.read_csv(filename,index_col=0)
    forecast_stock(name, ticker, df)


📈 Forecasting for Meta (META)


15:21:08 - cmdstanpy - INFO - Chain [1] start processing
15:21:09 - cmdstanpy - INFO - Chain [1] done processing


2025-08-31: $724.67
2025-12-31: $761.17
2026-08-31: $895.27
2026-12-31: $934.49
2027-08-31: $1068.72
2027-12-31: $1107.72

📈 Forecasting for Amazon (AMZN)


15:21:10 - cmdstanpy - INFO - Chain [1] start processing
15:21:10 - cmdstanpy - INFO - Chain [1] done processing


2025-08-31: $230.04
2025-12-31: $235.66
2026-08-31: $264.73
2026-12-31: $270.56
2027-08-31: $299.68
2027-12-31: $305.37

📈 Forecasting for Netflix (NFLX)


15:21:11 - cmdstanpy - INFO - Chain [1] start processing
15:21:12 - cmdstanpy - INFO - Chain [1] done processing


2025-08-31: $1221.53
2025-12-31: $1390.23
2026-08-31: $1704.47
2026-12-31: $1871.52
2027-08-31: $2186.77
2027-12-31: $2351.02

📈 Forecasting for Google (GOOGL)


15:21:13 - cmdstanpy - INFO - Chain [1] start processing
15:21:14 - cmdstanpy - INFO - Chain [1] done processing


2025-08-31: $186.83
2025-12-31: $189.43
2026-08-31: $203.84
2026-12-31: $207.20
2027-08-31: $221.64
2027-12-31: $225.10

📈 Forecasting for NVIDIA (NVDA)


15:21:15 - cmdstanpy - INFO - Chain [1] start processing
15:21:16 - cmdstanpy - INFO - Chain [1] done processing


2025-08-31: $158.88
2025-12-31: $169.96
2026-08-31: $201.94
2026-12-31: $213.34
2027-08-31: $245.20
2027-12-31: $256.46


# Stock Spike Detection and News Headlines

In [8]:

one_year_ago = (datetime.today()-relativedelta(years=1)).strftime('%Y-%m-%d')
all_news = []

for name, ticker in tickers.items():
    filename = f'{name}.csv'
    df = pd.read_csv(filename,index_col=0)
    
    # Preparing data for Spike detection and scraping by getting data from 1 year ago
    ind =list(df[df.Date == one_year_ago].index)[0]
    newdf = df.iloc[ind:].reset_index()

    # Run spikes detector for each company
    spikes = detect_price_spikes(newdf, threshold=0.05)  # daily move > 5%
    
    if not spikes.empty:
        spikefile = f'spike_{name}.csv'
        spike_results = spikes[['Date','pctchange']]
        spike_results.to_csv(spikefile,index=True)
    
    # Run scraper for all spikes
    print(f"🔍 Searching news for {name}")
    for _,row in spike_results.iterrows():
        date = row['Date']
        #date=  datetime.strptime(date, '%Y-%m-%d')
        print(f"⚠️ on {date} (spike: {row['pctchange']:.2%})...")
        headlines = get_news_headlines(name, date)
        # # Print the headlines
        # for news in headlines:
        #     print('📌',news['title'])
        all_news.extend(headlines)
        time.sleep(2)  # avoid hitting rate limit

# Save the headlines to csv file
newsdf = pd.DataFrame(all_news)
newsdf.to_csv("faang_spike_news.csv", index=False)
print("✅ News saved to faang_spike_news.csv")

🔍 Searching news for Meta
⚠️ on 2025-04-03 (spike: -8.96%)...
⚠️ on 2025-04-04 (spike: -5.06%)...
⚠️ on 2025-04-09 (spike: 14.76%)...
⚠️ on 2025-04-10 (spike: -6.74%)...
⚠️ on 2025-05-12 (spike: 7.92%)...
⚠️ on 2025-07-31 (spike: 11.25%)...
🔍 Searching news for Amazon
⚠️ on 2024-08-02 (spike: -8.78%)...
⚠️ on 2024-11-01 (spike: 6.19%)...
⚠️ on 2025-04-03 (spike: -8.98%)...
⚠️ on 2025-04-09 (spike: 11.98%)...
⚠️ on 2025-04-10 (spike: -5.17%)...
⚠️ on 2025-05-12 (spike: 8.07%)...
🔍 Searching news for Netflix
⚠️ on 2024-10-18 (spike: 11.09%)...
⚠️ on 2025-01-22 (spike: 9.69%)...
⚠️ on 2025-03-06 (spike: -8.53%)...
⚠️ on 2025-04-04 (spike: -6.67%)...
⚠️ on 2025-04-09 (spike: 8.62%)...
⚠️ on 2025-04-22 (spike: 5.31%)...
⚠️ on 2025-07-18 (spike: -5.10%)...
🔍 Searching news for Google
⚠️ on 2024-12-10 (spike: 5.59%)...
⚠️ on 2024-12-11 (spike: 5.52%)...
⚠️ on 2025-02-05 (spike: -7.29%)...
⚠️ on 2025-04-09 (spike: 9.68%)...
⚠️ on 2025-05-07 (spike: -7.26%)...
🔍 Searching news for NVIDIA
⚠️ on 