In [79]:
import streamlit as st
from pytrends.request import TrendReq
from prophet import Prophet
import pandas as pd
import plotly.graph_objects as go

country_list = [
    ("Worldwide", ""),
    ("United States", "US"),
    ("United Kingdom", "GB"),
    ("India", "IN"),
    ("Canada", "CA")
]

today = pd.Timestamp.now().strftime("%Y-%m-%d")

def fit_and_forecast(df):
    m = Prophet()
    m.fit(df[['ds', 'y']])
    future = m.make_future_dataframe(periods=365)
    forecast = m.predict(future)
    return forecast

def plot_trend(fig, df, forecast, keyword, color):
    # Actual 
    fig.add_trace(go.Scatter(
        x=df['ds'][df['ds'] <= today],
        y=df['y'][df['ds'] <= today],
        mode='lines',
        name=f'Actual for {keyword}',
        line=dict(color=color['Actual']),
        hovertemplate='Date: %{x}<br>Value: %{y}'
    ))

    # Forecast
    fig.add_trace(go.Scatter(
        x=forecast['ds'][forecast['ds'] > today],
        y=forecast['yhat'][forecast['ds'] > today],
        mode='lines',
        name=f'Forecast for {keyword}',
        line=dict(color=color['Forecast']),
        hovertemplate='Date: %{x}<br>Value: %{y}'
    ))

    # Confidence interval
    fig.add_trace(go.Scatter(
        x=forecast['ds'][forecast['ds'] > today],
        y=forecast['yhat_upper'][forecast['ds'] > today],
        mode='lines',
        line=dict(width=0),
        showlegend=False,
        fillcolor=color['ConfidenceInterval'],
        fill='tonexty',
        hoverinfo='skip'
    ))

    fig.add_trace(go.Scatter(
        x=forecast['ds'][forecast['ds'] > today],
        y=forecast['yhat_lower'][forecast['ds'] > today],
        mode='lines',
        line=dict(width=0),
        showlegend=False,
        fillcolor=color['ConfidenceInterval'],
        fill='tonexty',
        hoverinfo='skip'
    ))
    
    return fig

# Initialize color dictionary for graph
color_dict = {
    'Keyword1': {'Actual': '#2E86C1', 'Forecast': '#5DADE2', 'ConfidenceInterval': 'rgba(93, 173, 226, 0.3)'},
    'Keyword2': {'Actual': '#BFC9CA', 'Forecast': '#D5DBDB', 'ConfidenceInterval': 'rgba(213, 219, 219, 0.3)'},
    'Keyword3': {'Actual': '#58D68D', 'Forecast': '#7DCEA0', 'ConfidenceInterval': 'rgba(125, 206, 160, 0.3)'}
}



col1, col2 = st.columns(2)

# Number of keywords selector
with col1:
    num_keywords = st.selectbox("Number of Keywords", [1, 2, 3], format_func=lambda x: f"{x} Keyword{'s' if x > 1 else ''}")

# Country selector 
with col2:
    selected_country, selected_country_code = st.selectbox("Country", country_list, format_func=lambda x: x[0])

# Keywords
keyword1 = st.text_input("Enter first keyword", "")
keyword2, keyword3 = "", ""
if num_keywords >= 2:
    keyword2 = st.text_input("Enter second keyword", "")
if num_keywords == 3:
    keyword3 = st.text_input("Enter third keyword", "")

# Initialize a text string to hold the data
text_data = ""

# Button
if st.button("Check Popularity"):
    pytrends = TrendReq(hl='en-US', tz=360)
    keywords = [keyword1]
    if keyword2:
        keywords.append(keyword2)
    if keyword3:
        keywords.append(keyword3)
    
    pytrends.build_payload(keywords, cat=0, timeframe='today 5-y', geo=selected_country_code, gprop='')
    data = pytrends.interest_over_time()

    fig = go.Figure()

    for i, keyword in enumerate(keywords):
        color = color_dict[f'Keyword{i+1}']
        df = data.reset_index().rename(columns={'date': 'ds', keyword: 'y'})
        forecast = fit_and_forecast(df)

        # Add to text_data with rounding to the nearest whole number and sampling every 2nd point
        text_data += f"Keyword number {i+1}|{keyword}|"
        text_data += "|Actual data from Google Trends|" + ','.join(map(lambda x: str(round(x)), df['y'][df['ds'] <= pd.Timestamp.now().strftime("%Y-%m-%d")][::2].tolist()))
        text_data += "|Forecasted data using the Prophet model|" + ','.join(map(lambda x: str(round(x)), forecast['yhat'][forecast['ds'] > pd.Timestamp.now().strftime("%Y-%m-%d")][::2].tolist()))
        text_data += "|Upper Confidence Interval|" + ','.join(map(lambda x: str(round(x)), forecast['yhat_upper'][forecast['ds'] > pd.Timestamp.now().strftime("%Y-%m-%d")][::2].tolist()))
        text_data += "|Lower Confidence Interval|" + ','.join(map(lambda x: str(round(x)), forecast['yhat_lower'][forecast['ds'] > pd.Timestamp.now().strftime("%Y-%m-%d")][::2].tolist()))
        text_data += "|"

        fig = plot_trend(fig, df, forecast, keyword, color)

keywords = ["nfl"]
keyword = "nfl"

pytrends = TrendReq(hl='en-US', tz=360)
pytrends.build_payload(keywords, cat=0, timeframe='today 5-y', geo=selected_country_code, gprop='')
data = pytrends.interest_over_time()

df = data.reset_index().rename(columns={'date': 'ds', keyword: 'y'})
forecast = fit_and_forecast(df)

# Add to text_data with rounding to the nearest whole number and sampling every 2nd point
text_data += f"Keyword number 1:{keywords[0]} "
text_data += "|Starting date|" + df['ds'][0].strftime("%Y-%m-%d")
text_data += "|Actual data from Google Trends|" + ','.join(map(lambda x: str(round(x)), df['y'][df['ds'] <= pd.Timestamp.now().strftime("%Y-%m-%d")][::2].tolist()))
text_data += "|Forecasted data generated using the Prophet model|" + ','.join(map(lambda x: str(round(x)), forecast['yhat'][forecast['ds'] > pd.Timestamp.now().strftime("%Y-%m-%d")][::2].tolist()))
text_data += "|Upper Confidence Interval|" + ','.join(map(lambda x: str(round(x)), forecast['yhat_upper'][forecast['ds'] > pd.Timestamp.now().strftime("%Y-%m-%d")][::2].tolist()))
text_data += "|Lower Confidence Interval|" + ','.join(map(lambda x: str(round(x)), forecast['yhat_lower'][forecast['ds'] > pd.Timestamp.now().strftime("%Y-%m-%d")][::2].tolist()))
text_data += "|"

keywords2 = ["nba"]
keyword2 = "nba"

pytrends = TrendReq(hl='en-US', tz=360)
pytrends.build_payload(keywords2, cat=0, timeframe='today 5-y', geo=selected_country_code, gprop='')
data = pytrends.interest_over_time()


df = data.reset_index().rename(columns={'date': 'ds', keyword2: 'y'})
forecast = fit_and_forecast(df)


# Find the latest date from df
relevant_date = df.iloc[-1]['ds'] 

text_data += f"Keyword number 2:{keywords2[0]} "
text_data += "|Starting date|" + df['ds'][0].strftime("%Y-%m-%d")
text_data += "|Actual data from Google Trends|" + ','.join(map(lambda x: str(round(x)), df['y'].tolist()))
text_data += "|Forecasted data generated using the Prophet model|" + ','.join(map(lambda x: str(round(x)), forecast['yhat'][forecast['ds'] > relevant_date.strftime("%Y-%m-%d")][::7].tolist()))
text_data += "|Upper confidence interval|" + ','.join(map(lambda x: str(round(x)), forecast['yhat_upper'][forecast['ds'] > relevant_date.strftime("%Y-%m-%d")][::7].tolist()))
text_data += "|Lower confidence interval|" + ','.join(map(lambda x: str(round(x)), forecast['yhat_lower'][forecast['ds'] > relevant_date.strftime("%Y-%m-%d")][::7].tolist()))
text_data += "|"


# forecast[265:]

forecast['yhat'][265::2].tolist()

23:55:23 - cmdstanpy - INFO - Chain [1] start processing
23:55:23 - cmdstanpy - INFO - Chain [1] done processing
23:55:24 - cmdstanpy - INFO - Chain [1] start processing
23:55:24 - cmdstanpy - INFO - Chain [1] done processing


[41.86246469823671,
 41.28082587596295,
 40.70190300456818,
 40.18279941926228,
 39.77099929224472,
 39.49871537838506,
 39.3789377002135,
 39.40365928529427,
 39.54452412721974,
 39.75588124265193,
 39.9799647588866,
 40.153681733227806,
 40.21630280747555,
 40.117236548447195,
 39.82303963973915,
 39.32287621852518,
 38.63178533648834,
 37.791331483443024,
 36.867477487078986,
 35.94580472272603,
 35.124482793667056,
 34.50563068354372,
 34.185888552718936,
 34.24711494714246,
 34.748127695920445,
 35.71831718759953,
 37.15378666057849,
 39.01643298279857,
 41.236097420286505,
 43.71561778780202,
 46.33833123355535,
 48.97733912134264,
 51.50567579389533,
 53.806438135273325,
 55.78194078202928,
 57.361061080070215,
 58.504117743905766,
 59.204868921108925,
 59.48949426785583,
 59.41271357642153,
 59.051462869641725,
 58.49677155283622,
 57.844640129909294,
 57.18679320965781,
 56.602171505948036,
 56.14993249381958,
 55.86456377354169,
 55.75349431887083,
 55.79733974962707,
 55.952

In [76]:
df['y'][df['ds'] <= pd.Timestamp.now().strftime("%Y-%m-%d")][::2].tolist()

[29,
 35,
 32,
 28,
 59,
 57,
 64,
 47,
 74,
 18,
 20,
 10,
 8,
 8,
 9,
 8,
 10,
 15,
 40,
 37,
 35,
 35,
 40,
 36,
 39,
 40,
 30,
 38,
 11,
 9,
 10,
 10,
 11,
 10,
 9,
 9,
 9,
 14,
 38,
 60,
 62,
 49,
 41,
 25,
 7,
 8,
 12,
 12,
 38,
 35,
 41,
 37,
 33,
 36,
 39,
 35,
 34,
 34,
 43,
 75,
 62,
 71,
 39,
 38,
 16,
 9,
 8,
 7,
 7,
 15,
 39,
 38,
 34,
 34,
 35,
 37,
 36,
 35,
 36,
 37,
 35,
 37,
 40,
 61,
 66,
 58,
 53,
 23,
 16,
 9,
 8,
 7,
 7,
 8,
 9,
 15,
 37,
 33,
 34,
 35,
 32,
 37,
 38,
 43,
 40,
 41,
 40,
 47,
 54,
 91,
 100,
 71,
 52,
 22,
 14,
 9,
 7,
 7,
 7,
 7,
 7,
 16,
 33,
 36,
 40,
 37,
 39,
 39,
 38,
 35]

In [85]:
df.tail()

Unnamed: 0,ds,y,isPartial
255,2024-01-07,37,False
256,2024-01-14,38,False
257,2024-01-21,35,False
258,2024-01-28,39,False
259,2024-02-04,40,False


In [84]:
forecast['ds'][forecast['ds'] >= relevant_date.strftime("%Y-%m-%d")][::].tolist()

[Timestamp('2024-01-28 00:00:00'),
 Timestamp('2024-02-04 00:00:00'),
 Timestamp('2024-02-05 00:00:00'),
 Timestamp('2024-02-06 00:00:00'),
 Timestamp('2024-02-07 00:00:00'),
 Timestamp('2024-02-08 00:00:00'),
 Timestamp('2024-02-09 00:00:00'),
 Timestamp('2024-02-10 00:00:00'),
 Timestamp('2024-02-11 00:00:00'),
 Timestamp('2024-02-12 00:00:00'),
 Timestamp('2024-02-13 00:00:00'),
 Timestamp('2024-02-14 00:00:00'),
 Timestamp('2024-02-15 00:00:00'),
 Timestamp('2024-02-16 00:00:00'),
 Timestamp('2024-02-17 00:00:00'),
 Timestamp('2024-02-18 00:00:00'),
 Timestamp('2024-02-19 00:00:00'),
 Timestamp('2024-02-20 00:00:00'),
 Timestamp('2024-02-21 00:00:00'),
 Timestamp('2024-02-22 00:00:00'),
 Timestamp('2024-02-23 00:00:00'),
 Timestamp('2024-02-24 00:00:00'),
 Timestamp('2024-02-25 00:00:00'),
 Timestamp('2024-02-26 00:00:00'),
 Timestamp('2024-02-27 00:00:00'),
 Timestamp('2024-02-28 00:00:00'),
 Timestamp('2024-02-29 00:00:00'),
 Timestamp('2024-03-01 00:00:00'),
 Timestamp('2024-03-

In [86]:
forecast

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
0,2019-02-17,28.536200,14.657756,50.746310,28.536200,28.536200,4.148697,4.148697,4.148697,4.148697,4.148697,4.148697,0.0,0.0,0.0,32.684897
1,2019-02-24,28.565850,12.950544,50.439228,28.565850,28.565850,3.626373,3.626373,3.626373,3.626373,3.626373,3.626373,0.0,0.0,0.0,32.192224
2,2019-03-03,28.595501,13.237505,51.626674,28.595501,28.595501,4.281498,4.281498,4.281498,4.281498,4.281498,4.281498,0.0,0.0,0.0,32.876999
3,2019-03-10,28.625151,13.275749,50.602509,28.625151,28.625151,3.927183,3.927183,3.927183,3.927183,3.927183,3.927183,0.0,0.0,0.0,32.552334
4,2019-03-17,28.654802,10.019684,50.051031,28.654802,28.654802,1.346462,1.346462,1.346462,1.346462,1.346462,1.346462,0.0,0.0,0.0,30.001264
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
620,2025-01-30,37.133318,26.073594,64.073031,37.125022,37.142487,7.257841,7.257841,7.257841,7.257841,7.257841,7.257841,0.0,0.0,0.0,44.391159
621,2025-01-31,37.137262,24.975655,63.222556,37.128933,37.146483,7.357419,7.357419,7.357419,7.357419,7.357419,7.357419,0.0,0.0,0.0,44.494682
622,2025-02-01,37.141207,25.910107,64.162204,37.132838,37.150479,7.408201,7.408201,7.408201,7.408201,7.408201,7.408201,0.0,0.0,0.0,44.549408
623,2025-02-02,37.145151,25.218748,65.174539,37.136736,37.154466,7.409162,7.409162,7.409162,7.409162,7.409162,7.409162,0.0,0.0,0.0,44.554313
