# Single Stock Summary Generation
## Get Data

In [1]:
import os
import numpy as np
import pandas as pd
# Progress bar
from tqdm.notebook import tqdm as pbar
from tqdm.auto import tqdm as pdbar

# Pandas tqdm initialization
pdbar.pandas()

# Get rid of warnings 
import warnings

def set_index_no_warnings(df):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=UserWarning)
        warnings.simplefilter("ignore", category=FutureWarning)
        index = df.index.str.replace("—", "").str.replace("EST", "").str.strip()
        return pd.to_datetime(index, format="mixed").date


# Ticker
tick = "AAPL"

# Input ata directory
raw = "../news_raw"

# Load data
df = (
        pd.read_csv(
        os.path.join(raw, f"{tick.lower()}.csv"),
        index_col="Date",
        usecols=["Date", "Text"]
    )
    .rename(columns={"Text":"Article"})
    .query("index != 0 and index != '0'")
    .pipe(
        lambda df: df.set_index(
            set_index_no_warnings(df)
        )
    )
)

df

Unnamed: 0,Article
2024-01-25,"For Immediate Release\nChicago, IL – January 2..."
2024-02-03,Thanks to its lineup of incredibly popular har...
2024-02-03,"In this podcast, Motley Fool host Dylan Lewis ..."
2024-02-03,Shares in Advanced Micro Devices (NASDAQ: AMD)...
2024-02-03,Investing in the stock market is one of the mo...
...,...
2024-02-03,"In this podcast, Motley Fool host Dylan Lewis ..."
2024-02-03,Shares in Advanced Micro Devices (NASDAQ: AMD)...
2024-02-03,Investing in the stock market is one of the mo...
2024-02-03,"Since its price cratered 65% in 2022, Bitcoin ..."


## Summarization

In [2]:
# In the environment, run nltk.download("punkt") once if error is thrown
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.nlp.stemmers import Stemmer
from sumy.summarizers.lsa import LsaSummarizer
from sumy.summarizers.luhn import LuhnSummarizer
from sumy.summarizers.text_rank import TextRankSummarizer
from sumy.summarizers.lex_rank import LexRankSummarizer

# Summary length in sentences:
summary_len = 13

# Summarizer initialization
summarizers = {
    "Lsa_summary":LsaSummarizer(Stemmer("english")),
    "Luhn_summary":LuhnSummarizer(Stemmer("english")),
    "Textrank_summary":TextRankSummarizer(Stemmer("english")),
    "Lexrank_summary":LexRankSummarizer(Stemmer("english"))
}

def summarize(text, date):
    # Parse article
    parser = PlaintextParser.from_string(text, Tokenizer("english"))
    # Run through summarizers
    summaries = {name : bot(parser.document, summary_len) for name, bot in summarizers.items()}
    summaries["Date"] = date
    return summaries

In [3]:
# For parallel processing
from concurrent.futures import ThreadPoolExecutor

# Column names
summ_bots = ["Lsa_summary", "Luhn_summary", "Textrank_summary", "Lexrank_summary"]

# Keep things from crashing
def helper(text, date):
    try:
        return summarize(text, date)
    except Exception:
        print("ERROR ERROR ERROR")
        print(Exception)
        return {name : None for name in summ_bots}  # fallback

# Parallelize processing
with ThreadPoolExecutor(max_workers=4) as executor:
    results = list(executor.map(helper, df["Article"], df.index))

# Remove empty results
# Make dataframe of results
# Drop article column
cleaned = [result for result in results if result]
df = pd.concat([df.drop(columns=["Article"]), pd.DataFrame(cleaned).set_index("Date")], axis=1)

df

Unnamed: 0,Lsa_summary,Luhn_summary,Textrank_summary,Lexrank_summary
2024-01-25,"(For Immediate Release Chicago, IL – January 2...","(For Immediate Release Chicago, IL – January 2...","(For Immediate Release Chicago, IL – January 2...","(For Immediate Release Chicago, IL – January 2..."
2024-02-03,(Maybe its turnaround plans could get a boost ...,(Thanks to its lineup of incredibly popular ha...,(Thanks to its lineup of incredibly popular ha...,(Thanks to its lineup of incredibly popular ha...
2024-02-03,"(Dylan Lewis: Bill, I love checking in on Micr...",(It was a long road to becoming a $3 trillion ...,(It was a long road to becoming a $3 trillion ...,"(Andy Cross: Well, first on the news Dylan, an..."
2024-02-03,"(Meanwhile, its free cash flow has plunged 47%...",(Shares in Advanced Micro Devices (NASDAQ: AMD...,(The launch of OpenAI's ChatGPT in November 20...,(Shares in Advanced Micro Devices (NASDAQ: AMD...
2024-02-03,(The S&P 500 recently reached a new all-time h...,(Investing in the stock market is one of the m...,(Investing in the stock market is one of the m...,(Exchange-traded funds (ETFs) can be fantastic...
...,...,...,...,...
2024-02-03,"(Dylan Lewis: Bill, I love checking in on Micr...",(It was a long road to becoming a $3 trillion ...,(It was a long road to becoming a $3 trillion ...,"(Andy Cross: Well, first on the news Dylan, an..."
2024-02-03,"(Meanwhile, its free cash flow has plunged 47%...",(Shares in Advanced Micro Devices (NASDAQ: AMD...,(The launch of OpenAI's ChatGPT in November 20...,(Shares in Advanced Micro Devices (NASDAQ: AMD...
2024-02-03,(The S&P 500 recently reached a new all-time h...,(Investing in the stock market is one of the m...,(Investing in the stock market is one of the m...,(Exchange-traded funds (ETFs) can be fantastic...
2024-02-03,"(Since its price cratered 65% in 2022, Bitcoin...",(Bullish investors are sure that there is a lo...,(But it stands out among the tens of thousands...,"(At a price of just over $43,000 as of Feb. 1,..."


In [4]:
df.loc[pd.to_datetime("2024-01-25").date(), "Lsa_summary"]

2024-01-25    (For Immediate Release Chicago, IL – January 2...
2024-01-25    (Given this, I can't help but remain bullish o...
2024-01-25    (But retail investors should pay close attenti...
2024-01-25    (Monster Beverage stock (NASDAQ:MNST) has prod...
2024-01-25    (The NASDAQ 100 After Hours Indicator is down ...
2024-01-25    ((RTTNews) - Apple Inc. (AAPL) introduced sign...
2024-01-25    ((RTTNews) - On Wednesday, Microsoft (MSFT) be...
2024-01-25    (Though consumer spending habits can shift sud...
2024-01-25    (Even prominent Tesla bull Dan Ives of Wedbush...
2024-01-25    (Alphabet Inc. Price and EPS Surprise Alphabet...
2024-01-25    (Comparing units outstanding versus one week a...
2024-01-25    (Apple (AAPL) is expected to deliver a year-ov...
2024-01-25    (With Apple (NASDAQ: AAPL) shares rising more ...
2024-01-25    (It will be remembered as the year artificial ...
2024-01-25    (Earnings are arguably the most important sing...
2024-01-25    (Bull of the Day: Apple st

In [5]:
# Sort values
df.sort_index(ascending=False, inplace=True)
df

Unnamed: 0,Lsa_summary,Luhn_summary,Textrank_summary,Lexrank_summary
2024-02-03,(China stocks got hit hard last month as a com...,(China stocks got hit hard last month as a com...,(China stocks got hit hard last month as a com...,(China stocks got hit hard last month as a com...
2024-02-03,(Maybe its turnaround plans could get a boost ...,(Thanks to its lineup of incredibly popular ha...,(Thanks to its lineup of incredibly popular ha...,(Thanks to its lineup of incredibly popular ha...
2024-02-03,"(Since its price cratered 65% in 2022, Bitcoin...",(Bullish investors are sure that there is a lo...,(But it stands out among the tens of thousands...,"(At a price of just over $43,000 as of Feb. 1,..."
2024-02-03,(The S&P 500 recently reached a new all-time h...,(Investing in the stock market is one of the m...,(Investing in the stock market is one of the m...,(Exchange-traded funds (ETFs) can be fantastic...
2024-02-03,"(Meanwhile, its free cash flow has plunged 47%...",(Shares in Advanced Micro Devices (NASDAQ: AMD...,(The launch of OpenAI's ChatGPT in November 20...,(Shares in Advanced Micro Devices (NASDAQ: AMD...
...,...,...,...,...
2023-12-19,(Each of the major market indexes has gained m...,(Here's a look at my six largest holdings head...,"(Perhaps more importantly, the S&P 500 and the...",(Here's a look at my six largest holdings head...
2023-12-19,(Among the underlying components of the Russel...,(Among the underlying components of the Russel...,(Among the underlying components of the Russel...,(Among the underlying components of the Russel...
2023-12-19,"(Of the funds launched in 2023, 76% were activ...","(In addition to active ETFs, popular new ETFs ...","(In addition to active ETFs, popular new ETFs ...","(Of the funds launched in 2023, 76% were activ..."
2023-12-19,(Shiba Inu is still up just 25% for the year a...,(More than two years after the peak of meme co...,(More than two years after the peak of meme co...,(More than two years after the peak of meme co...


In [6]:
df.iloc[0, 0]

(<Sentence: China stocks got hit hard last month as a combination of weak economic data, interventions by the Chinese government against selling stocks, and ongoing regulatory concerns pushed the sector down broadly.>,
 <Sentence: JD data by YCharts China stocks continue to struggle Chinese stocks started off the month on the wrong foot as China reported GDP growth of just 5.2% for 2023.>,
 <Sentence: While that would be a good pace for any country, it marked China's slowest yearly growth in 30 years, and its GDP growth slowed to just 4.1% in the fourth quarter.>,
 <Sentence: Investors also seemed spooked that Beijing told some investors not to sell Chinese stock amid a rotation among some money managers from China to Japan.>,
 <Sentence: That was the latest sign of weakness in China's real estate sector.>,
 <Sentence: Those news items all pressured China stocks further, while JD, PDD, and Baidu faced their own challenges.>,
 <Sentence: It has lost market share to PDD Holdings' Pinduod

In [7]:
df[summ_bots] = df[summ_bots].map(lambda cell: " ".join(str(sent) for sent in cell))
df

Unnamed: 0,Lsa_summary,Luhn_summary,Textrank_summary,Lexrank_summary
2024-02-03,China stocks got hit hard last month as a comb...,China stocks got hit hard last month as a comb...,China stocks got hit hard last month as a comb...,China stocks got hit hard last month as a comb...
2024-02-03,Maybe its turnaround plans could get a boost w...,Thanks to its lineup of incredibly popular har...,Thanks to its lineup of incredibly popular har...,Thanks to its lineup of incredibly popular har...
2024-02-03,"Since its price cratered 65% in 2022, Bitcoin ...",Bullish investors are sure that there is a lot...,But it stands out among the tens of thousands ...,"At a price of just over $43,000 as of Feb. 1, ..."
2024-02-03,The S&P 500 recently reached a new all-time hi...,Investing in the stock market is one of the mo...,Investing in the stock market is one of the mo...,Exchange-traded funds (ETFs) can be fantastic ...
2024-02-03,"Meanwhile, its free cash flow has plunged 47% ...",Shares in Advanced Micro Devices (NASDAQ: AMD)...,The launch of OpenAI's ChatGPT in November 202...,Shares in Advanced Micro Devices (NASDAQ: AMD)...
...,...,...,...,...
2023-12-19,Each of the major market indexes has gained mo...,Here's a look at my six largest holdings headi...,"Perhaps more importantly, the S&P 500 and the ...",Here's a look at my six largest holdings headi...
2023-12-19,Among the underlying components of the Russell...,Among the underlying components of the Russell...,Among the underlying components of the Russell...,Among the underlying components of the Russell...
2023-12-19,"Of the funds launched in 2023, 76% were active...","In addition to active ETFs, popular new ETFs i...","In addition to active ETFs, popular new ETFs i...","Of the funds launched in 2023, 76% were active..."
2023-12-19,Shiba Inu is still up just 25% for the year am...,More than two years after the peak of meme coi...,More than two years after the peak of meme coi...,More than two years after the peak of meme coi...


In [8]:
df.iloc[0, 0]

"China stocks got hit hard last month as a combination of weak economic data, interventions by the Chinese government against selling stocks, and ongoing regulatory concerns pushed the sector down broadly. JD data by YCharts China stocks continue to struggle Chinese stocks started off the month on the wrong foot as China reported GDP growth of just 5.2% for 2023. While that would be a good pace for any country, it marked China's slowest yearly growth in 30 years, and its GDP growth slowed to just 4.1% in the fourth quarter. Investors also seemed spooked that Beijing told some investors not to sell Chinese stock amid a rotation among some money managers from China to Japan. That was the latest sign of weakness in China's real estate sector. Those news items all pressured China stocks further, while JD, PDD, and Baidu faced their own challenges. It has lost market share to PDD Holdings' Pinduoduo, which has been growing rapidly through its social commerce model, which offers discounted p

## Export

In [9]:
# Make output directory
out_dir = f"../{tick.lower()}_data"
os.makedirs(f"../{tick.lower()}_data", exist_ok=True)

In [10]:
# Export to csv
df.to_csv(os.path.join(out_dir, "summaries.csv"))

In [11]:
df

Unnamed: 0,Lsa_summary,Luhn_summary,Textrank_summary,Lexrank_summary
2024-02-03,China stocks got hit hard last month as a comb...,China stocks got hit hard last month as a comb...,China stocks got hit hard last month as a comb...,China stocks got hit hard last month as a comb...
2024-02-03,Maybe its turnaround plans could get a boost w...,Thanks to its lineup of incredibly popular har...,Thanks to its lineup of incredibly popular har...,Thanks to its lineup of incredibly popular har...
2024-02-03,"Since its price cratered 65% in 2022, Bitcoin ...",Bullish investors are sure that there is a lot...,But it stands out among the tens of thousands ...,"At a price of just over $43,000 as of Feb. 1, ..."
2024-02-03,The S&P 500 recently reached a new all-time hi...,Investing in the stock market is one of the mo...,Investing in the stock market is one of the mo...,Exchange-traded funds (ETFs) can be fantastic ...
2024-02-03,"Meanwhile, its free cash flow has plunged 47% ...",Shares in Advanced Micro Devices (NASDAQ: AMD)...,The launch of OpenAI's ChatGPT in November 202...,Shares in Advanced Micro Devices (NASDAQ: AMD)...
...,...,...,...,...
2023-12-19,Each of the major market indexes has gained mo...,Here's a look at my six largest holdings headi...,"Perhaps more importantly, the S&P 500 and the ...",Here's a look at my six largest holdings headi...
2023-12-19,Among the underlying components of the Russell...,Among the underlying components of the Russell...,Among the underlying components of the Russell...,Among the underlying components of the Russell...
2023-12-19,"Of the funds launched in 2023, 76% were active...","In addition to active ETFs, popular new ETFs i...","In addition to active ETFs, popular new ETFs i...","Of the funds launched in 2023, 76% were active..."
2023-12-19,Shiba Inu is still up just 25% for the year am...,More than two years after the peak of meme coi...,More than two years after the peak of meme coi...,More than two years after the peak of meme coi...
