<img width="8%" alt="YahooFinance.png" src="https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/.github/assets/logos/YahooFinance.png" style="border-radius: 15%">

# YahooFinance - Chat about Amgen trends and predictions
<a href="https://bit.ly/3JyWIk6">Give Feedback</a> | <a href="https://github.com/jupyter-naas/awesome-notebooks/issues/new?assignees=&labels=bug&template=bug_report.md&title=YahooFinance+-+Chat+about+Amgen+trends+and+predictions:+Error+short+description">Bug report</a>

**Tags:** #yahoo #finance #ai #chat #plugin #python #amgn

**Author:** [Jeremy Ravenel](https://www.linkedin.com/in/jeremyravenel/)

**Last update:** 2023-08-31 (Created: 2023-08-31)

**Description:** This notebook will generate an Naas Chat plugin for YahooFinance for Amgen. It uses Python to access the YahooFinance API, NewsAPI, create one big table with actual, predictions, news, and sentiment and output a plugin that can be used to answer questions about the stock performance.

**References:**
- [YahooFinance Naas driver](https://pypi.org/project/naas-drivers/)
- [NLTK for sentiment analysis](https://github.com/cjhutto/vaderSentiment)
- [NewsAPI for latest news query](https://newsapi.org/docs)
- [OpenAI API](https://platform.openai.com/docs/introduction)

## Input

### Import libraries

In [None]:
import os
from os import path
import naas
import pandas as pd
from naas_drivers import prediction, yahoofinance, plotly, newsapi
import plotly.graph_objects as go
import markdown2
from datetime import datetime
from IPython.core.display import display, HTML
try: 
    import nltk
except:
    !pip install nltk --user
    import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import json
try:
    import tiktoken
except:
    !pip install tiktoken --user
    import tiktoken

### Setup variables
**Mandatory**
- `ticker_name`: This is a variable that represents the full name of a specific company's stock.
- `ticker`: This variable holds the unique series of letters assigned to a security for trading purposes.
- `index_name`: This variable represents the name of a specific stock market index.
- `index`: This variable holds the unique symbol of a specific stock market index.

**Optional**
- `date_from`: This is an optional variable that indicates the starting date for a range of dates.
- `date_to`: This variable represents the end date of a range.
- `data_points`: This variable represents the number of data points that will be predicted in the future.
- `output_dir`: This variable represents the directory where the output files will be stored.
- `now`: This variable holds the current date and time in a specific format.
- `csv_output`: This variable represents the path of the output file in CSV format.
- `image_output`: This variable represents the path of the output file in image format.
- `html_output`: This variable represents the path of the output file in HTML format.
- `tracker_output`: This variable represents the path of the tracker output file in CSV format.
- `plugin_name`: This variable represents name of the plugin.
- `plugin_output`: This variable represents the path of the plugin.

In [None]:
# Mandatory
ticker_name = "Amgen"
ticker = "AMGN"
index_name = "Nasdaq" #in this case Nasdaq
index = "^IXIC" #in this case Nasdaq

# Optional
date_from = -270
date_to = "today"
data_points = 90 #number of data points predicted in the future
output_dir = ticker
now = datetime.now().strftime("%Y-%m-%d")
csv_output = path.join(output_dir, f"{now}_{ticker}.csv")
image_output = path.join(output_dir, f"{now}_{ticker}.png")
html_output = path.join(output_dir, f"{now}_{ticker}.html")
tracker_output = path.join(output_dir, f"{now}_{ticker}_tracker.csv")
plugin_name = f"YahooFinance - Chat about {ticker_name} trends and predictions"
plugin_output = (path.join(output_dir, f"{plugin_name}.json")).replace(" ", "_").replace("-", "_").upper()

# Create output dir
os.makedirs(output_dir, exist_ok=True)

## Model

### Get dataset from Yahoo Finance

In [None]:
# Get the Stock data
df_yahoo = yahoofinance.get(
    tickers=ticker,
    date_from=date_from,
    date_to=date_to
).dropna().reset_index(drop=True)

# Calculate the rolling minimum and maximum values of the 'total_predict' column
df_yahoo['min_rolling'] = df_yahoo['Close'].rolling(window=20).min()
df_yahoo['max_rolling'] = df_yahoo['Close'].rolling(window=20).max()

# Capitalize all column names
df_yahoo.columns = df_yahoo.columns.str.upper()

# Display dataframe
df_yahoo.tail(5)

### Add relative index data

In [None]:
# Get the Index data
relative_index_data = yahoofinance.get(
    tickers=index,
    date_from=date_from,
    date_to=date_to
).dropna().reset_index(drop=True)

# Calculate the relative strength
df_yahoo['RELATIVE_INDEX'] = relative_index_data['Close']
df_yahoo['RELATIVE_STRENGTH_BASE'] = (df_yahoo['CLOSE'] / relative_index_data['Close']) 
# Calculate the relative strength percentage
df_yahoo['RELATIVE_STRENGTH'] = df_yahoo['CLOSE'] * ( 1 - df_yahoo['RELATIVE_STRENGTH_BASE'])

# Display dataframe
df_yahoo.tail(5)

### Create tracker data

In [None]:
def get_variation(df, ticker):
    df_yahoo = df.sort_values("DATE", ascending=False).reset_index(drop=True)

    # Get value and date comp
    datanow = df_yahoo.loc[0, "CLOSE"]
    date_now = df_yahoo.loc[0, "DATE"]
    datayesterday = df_yahoo.loc[1, "CLOSE"]

    # Calc variation in value and %
    varv = datanow - datayesterday
    varp = (varv / datanow) * 100

    # Get min and max value
    min_value = df_yahoo["CLOSE"].min()
    max_value = df_yahoo["CLOSE"].max()
    
    # Calculate the score
    score = 0 + ((10 - 0) * (datanow - min_value) / (max_value - min_value))

    # Format result
    datanow = "${:,.2f}".format(round(datanow, 1))
    datayesterday = "${:,.2f}".format(round(datayesterday, 1))
    varv = "{:+,.2f}".format(varv)
    varp = "{:+,.2%}".format(varp / 100)  # dividing by 100 to undo the earlier multiplication by 100
    min_value = "${:,.2f}".format(round(min_value, 1))
    max_value = "${:,.2f}".format(round(max_value, 1))

    # Create a DataFrame to hold the results
    result_df = pd.DataFrame({
        "ENTITY": "Universal Tracker",
        "SCENARIO": [date_now],
        "INDICATOR": [ticker],
        "TYPE": "Financial",
        "SOURCE": "Yahoo Finance",
        "VALUE": [datanow],
        "MIX": [min_value],
        "MAX": [max_value],
        "SCORE": [round(score, 2)]
    })

    return result_df

df_tracker = get_variation(df_yahoo, ticker)
df_tracker.tail(5)

### Add prediction

In [None]:
df_predict = prediction.get(
    dataset=df_yahoo,
    date_column='DATE',
    column="CLOSE",
    data_points=data_points,
    prediction_type="all"
).sort_values("DATE", ascending=False).reset_index(drop=True)

# Create 'TOTAL_PREDICT' column
df_predict['TOTAL_PREDICT'] = df_predict['LINEAR'].where(df_predict['LINEAR'].notna(), df_predict['CLOSE'])

# Calculate the 20 and 50 day moving averages
df_predict['MA05'] = df_predict['TOTAL_PREDICT'].rolling(window=5).mean()
df_predict['MA10'] = df_predict['TOTAL_PREDICT'].rolling(window=10).mean()
df_predict['MA50'] = df_predict['TOTAL_PREDICT'].rolling(window=50).mean()

# Display dataframe
df_predict.head(int(data_points)+5)

### Get news data

In [None]:
df_news = newsapi.connect().get(
    f"{ticker_name}", 
    fields=["date","title", "image", "link", "description"]
)
df_news['date'] = pd.to_datetime(df_news['date']).dt.date
df_news= df_news.dropna()
print("News:", len(df_news))
df_news.tail(5)

### Analyze sentiment per news article

In [None]:
# Dowload varder/nltk lexicon to perform sentiment analysis
nltk.download('vader_lexicon') 

# Create sentiment analysis
def analyze_sentiment(df, columns):
    sid = SentimentIntensityAnalyzer()

    for column in columns:
        new_cols = [
            f'{column}_neg', 
            f'{column}_neu', 
            f'{column}_pos', 
            f'{column}_compound'
        ]
        df[new_cols] = df[column].apply(lambda x: pd.Series(sid.polarity_scores(x)))
        
    df = df.sort_values(by="date", ascending=False)
    return df.reset_index(drop=True)

# Call the function
df_sentiment = analyze_sentiment(df_news, ['title', 'description'])
df_sentiment.head(5)

### Compute sentiments stats

In [None]:
# Calculate sentiment stats
def calculate_sentiment(df, columns):
    for column in columns:
        # Calculate sums
        neg = df[f'{column}_neg'].sum().round(2)
        neu = df[f'{column}_neu'].sum().round(2)
        pos = df[f'{column}_pos'].sum().round(2)
        compound = df[f'{column}_compound'].sum().round(2)

        # Calculate total
        total = neg + neu + pos + compound

        # Calculate percentages
        neg_percent = (neg / total * 100).round(2)
        neu_percent = (neu / total * 100).round(2)
        pos_percent = (pos / total * 100).round(2)
        compound_percent = (compound / total * 100).round(2)

        # Append sums and percentages to df
        df = df.append(
            {
                f'{column}_neg': neg,
                f'{column}_neu': neu,
                f'{column}_pos': pos,
                f'{column}_compound': compound,
                f'{column}_neg_percent': neg_percent,
                f'{column}_neu_percent': neu_percent, 
                f'{column}_pos_percent': pos_percent,
                f'{column}_compound_percent': compound_percent
            }, 
            ignore_index=True
        )
        
        # Create global stats
        df['total_neg'] = df['title_neg'] + df['description_neg']  
        df['total_neu'] = df['title_neu'] + df['description_neu']  
        df['total_pos'] = df['title_pos'] + df['description_pos']  
        df['total_compound'] = df['title_compound'] + df['description_compound']  
    return df

# Call the function
df_news_stats = calculate_sentiment(df_sentiment, ['title', 'description'])
df_news_stats.head(5)

### Sum sentiment by category

In [None]:
# Calculate sums for 'title' and 'description'
title_neg = df_news_stats.title_neg.sum()
title_neu = df_news_stats.title_neu.sum()
title_pos = df_news_stats.title_pos.sum()
title_compound = df_news_stats.title_compound.sum()

desc_neg = df_news_stats.description_neg.sum()
desc_neu = df_news_stats.description_neu.sum()
desc_pos = df_news_stats.description_pos.sum()
desc_compound = df_news_stats.description_compound.sum()

# Calculate total sums
total_neg = round(title_neg + desc_neg, 2)
total_neu = round(title_neu + desc_neu, 2)
total_pos = round(title_pos + desc_pos, 2)
total_compound = round(title_compound + desc_compound, 2)

# Calculate percentage contribution of each category
neg_percent = (total_neg / (total_neg + total_neu + total_pos + total_compound) * 100).round(2)
neu_percent = (total_neu / (total_neg + total_neu + total_pos + total_compound) * 100).round(2)
pos_percent = (total_pos / (total_neg + total_neu + total_pos + total_compound) * 100).round(2)
compound_percent = (total_compound / (total_neg + total_neu + total_pos + total_compound) * 100).round(2)

# Preview of what will be sent by email:
print(f"Sum of news sentiment by category:")
print(
    "\n\t🔴 Negative \t",
    total_neg,
    f"({neg_percent}%)",
    "\n\t🟠 Neutral\t",
    total_neu,
    f"({neu_percent}%)",
    "\n\t🟢 Positive \t",
    total_pos,
    f"({pos_percent}%)",
    "\n\t🔵 Compound \t",
    total_compound,
    f"({compound_percent}%)",
)

### Merge the stock and news dataframes

In [None]:
# Clean datasets
df_news_stats.columns = df_news_stats.columns.str.upper()

# Format date
df_news_stats['DATE'] = pd.to_datetime(df_news_stats['DATE'])
df_predict['DATE'] = pd.to_datetime(df_predict['DATE'])

# Merge dataframes to create one big table
df_obt = pd.merge(df_predict, df_news_stats, on='DATE', how='left')
df_obt

### Save master table to CSV

In [None]:
df_obt.to_csv(csv_output, index=False)

link_csv = naas.asset.add(csv_output)

### Create graph with trends and predictions

In [None]:
fig = plotly.linechart(
    df_obt,
    x="DATE",
    y=["CLOSE", "LINEAR","RELATIVE_STRENGTH", "MA05", "MA10", "MA50", "MAX_ROLLING", "MIN_ROLLING"],
    showlegend=True,
    title = f'''<b><span style='font-size: 20px;'>{ticker_name} Trends & Prediction +{str(data_points)} days</span></b>
    <br><span style='font-size: 10px;'><b>News Analysis:</b> Negative: {total_neg} ({neg_percent}%), Neutral: {total_neu} ({neu_percent}%), Positive: {total_pos} ({pos_percent}%), Compound: {total_compound} ({compound_percent}%)</span>
    '''
)

### Save and share graph in PNG

In [None]:
# Save your graph in PNG
fig.write_image(image_output)

# Share output with naas
link_image = naas.asset.add(image_output, params={"inline": True})

#-> Uncomment the line below to remove your asset
# naas.asset.delete(image_output)

## Output

### Create Naas Chat plugin
We used Playground to refined the system prompt: https://platform.openai.com/playground?mode=chat&model=gpt-4

In [None]:
def num_tokens_from_string(string: str, encoding_name: str) -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens

def create_plugin(
    file_path,
    name,
    model,
    temperature,
    max_tokens,
    system_prompt,
):
    
    # Check tokens count on system_prompt
    system_prompt_tokens = num_tokens_from_string(system_prompt, "cl100k_base")
    if system_prompt_tokens > max_tokens * 0.2:
        print(f"⚠️ Be carefull, your system prompt looks too big. Tokens: {system_prompt_tokens} ({int(max_tokens * 0.2)})")
    else:
        print(f"✅ System prompt tokens count OK: {system_prompt_tokens} (Max recommanded: {int(max_tokens * 0.2)})")
    
    # Create json
    plugin = {
        "name": name,
        "model": model,
        "temperature": temperature,
        "max_tokens": max_tokens,
        "prompt": system_prompt,
    }
    
    # Save json to file
    with open(file_path, "w") as f:
        json.dump(plugin, f)
    return file_path

model = "gpt-3.5-turbo-16k"
temperature = 0
max_tokens = 16384
system_prompt = """
You are Abi, experience trader in the NY Stock Exchange. You want to analyze the stock of [TICKER]. 
Start by presenting youself.
Always display the graph in markdown about the [TICKER] trends and predictions: [LINK_IMAGE] and the CSV of all data computed to download and analyze in further details: [LINK_CSV]
Present the latest news on [TICKER].

Stock data: [STOCK]
News data: [DATA]
"""

system_prompt = system_prompt.replace("[TICKER]", ticker)
system_prompt = system_prompt.replace("[LINK_IMAGE]", link_image)
system_prompt = system_prompt.replace("[LINK_CSV]", link_csv)
system_prompt = system_prompt.replace("[DATA]", df_news.to_string(index=False))
system_prompt = system_prompt.replace("[STOCK]", df_tracker.to_string(index=False))

plugin_path = create_plugin(
    plugin_output,
    plugin_name,
    model,
    temperature,
    max_tokens,
    system_prompt,
)
print("✅ Plugin path:", plugin_path)

### Save plugin as naas asset
You can now use in your MyChatGPT by copy/pasting the URL after the command `/use`

In [None]:
naas.asset.add(plugin_path, params={"inline": True})