In [119]:
import streamlit as st
from pytrends.request import TrendReq
from prophet import Prophet
import pandas as pd
import plotly.graph_objects as go

country_list = [
    ("Worldwide", ""),
    ("United States", "US"),
    ("United Kingdom", "GB"),
    ("India", "IN"),
    ("Canada", "CA")
]

today = pd.Timestamp.now().strftime("%Y-%m-%d")

def fit_and_forecast(df):
    m = Prophet()
    m.fit(df[['ds', 'y']])
    future = m.make_future_dataframe(periods=365)
    forecast = m.predict(future)
    return forecast

def plot_trend(fig, df, forecast, keyword, color):
    # Actual 
    fig.add_trace(go.Scatter(
        x=df['ds'][df['ds'] <= today],
        y=df['y'][df['ds'] <= today],
        mode='lines',
        name=f'Actual for {keyword}',
        line=dict(color=color['Actual']),
        hovertemplate='Date: %{x}<br>Value: %{y}'
    ))

    # Forecast
    fig.add_trace(go.Scatter(
        x=forecast['ds'][forecast['ds'] > today],
        y=forecast['yhat'][forecast['ds'] > today],
        mode='lines',
        name=f'Forecast for {keyword}',
        line=dict(color=color['Forecast']),
        hovertemplate='Date: %{x}<br>Value: %{y}'
    ))

    # Confidence interval
    fig.add_trace(go.Scatter(
        x=forecast['ds'][forecast['ds'] > today],
        y=forecast['yhat_upper'][forecast['ds'] > today],
        mode='lines',
        line=dict(width=0),
        showlegend=False,
        fillcolor=color['ConfidenceInterval'],
        fill='tonexty',
        hoverinfo='skip'
    ))

    fig.add_trace(go.Scatter(
        x=forecast['ds'][forecast['ds'] > today],
        y=forecast['yhat_lower'][forecast['ds'] > today],
        mode='lines',
        line=dict(width=0),
        showlegend=False,
        fillcolor=color['ConfidenceInterval'],
        fill='tonexty',
        hoverinfo='skip'
    ))
    
    return fig

# Initialize color dictionary for graph
color_dict = {
    'Keyword1': {'Actual': '#2E86C1', 'Forecast': '#5DADE2', 'ConfidenceInterval': 'rgba(93, 173, 226, 0.3)'},
    'Keyword2': {'Actual': '#BFC9CA', 'Forecast': '#D5DBDB', 'ConfidenceInterval': 'rgba(213, 219, 219, 0.3)'},
    'Keyword3': {'Actual': '#58D68D', 'Forecast': '#7DCEA0', 'ConfidenceInterval': 'rgba(125, 206, 160, 0.3)'}
}



col1, col2 = st.columns(2)

# Number of keywords selector
with col1:
    num_keywords = st.selectbox("Number of Keywords", [1, 2, 3], format_func=lambda x: f"{x} Keyword{'s' if x > 1 else ''}")

# Country selector 
with col2:
    selected_country, selected_country_code = st.selectbox("Country", country_list, format_func=lambda x: x[0])

# Keywords
keyword1 = st.text_input("Enter first keyword", "")
keyword2, keyword3 = "", ""
if num_keywords >= 2:
    keyword2 = st.text_input("Enter second keyword", "")
if num_keywords == 3:
    keyword3 = st.text_input("Enter third keyword", "")

# Initialize a text string to hold the data
text_data = ""

# Button
if st.button("Check Popularity"):
    pytrends = TrendReq(hl='en-US', tz=360)
    keywords = [keyword1]
    if keyword2:
        keywords.append(keyword2)
    if keyword3:
        keywords.append(keyword3)
    
    pytrends.build_payload(keywords, cat=0, timeframe='today 5-y', geo=selected_country_code, gprop='')
    data = pytrends.interest_over_time()

    fig = go.Figure()

    for i, keyword in enumerate(keywords):
        color = color_dict[f'Keyword{i+1}']
        df = data.reset_index().rename(columns={'date': 'ds', keyword: 'y'})
        forecast = fit_and_forecast(df)

        # Add to text_data with rounding to the nearest whole number and sampling every 2nd point
        text_data += f"Keyword number {i+1}|{keyword}|"
        text_data += "|Actual data from Google Trends|" + ','.join(map(lambda x: str(round(x)), df['y'][df['ds'] <= pd.Timestamp.now().strftime("%Y-%m-%d")][::2].tolist()))
        text_data += "|Forecasted data using the Prophet model|" + ','.join(map(lambda x: str(round(x)), forecast['yhat'][forecast['ds'] > pd.Timestamp.now().strftime("%Y-%m-%d")][::2].tolist()))
        text_data += "|Upper Confidence Interval|" + ','.join(map(lambda x: str(round(x)), forecast['yhat_upper'][forecast['ds'] > pd.Timestamp.now().strftime("%Y-%m-%d")][::2].tolist()))
        text_data += "|Lower Confidence Interval|" + ','.join(map(lambda x: str(round(x)), forecast['yhat_lower'][forecast['ds'] > pd.Timestamp.now().strftime("%Y-%m-%d")][::2].tolist()))
        text_data += "|"

        fig = plot_trend(fig, df, forecast, keyword, color)

keywords = ["nfl"]
keyword = "nfl"

pytrends = TrendReq(hl='en-US', tz=360)
pytrends.build_payload(keywords, cat=0, timeframe='today 5-y', geo=selected_country_code, gprop='')
data = pytrends.interest_over_time()

df = data.reset_index().rename(columns={'date': 'ds', keyword: 'y'})
forecast = fit_and_forecast(df)

# Add to text_data with rounding to the nearest whole number and sampling every 2nd point
text_data += f"Keyword number 1:{keywords[0]} "
text_data += "|Actual data starting date|" + df['ds'].iloc[0].strftime("%Y-%m-%d")
text_data += "|Actual data from Google Trends|" + ','.join(map(lambda x: str(round(x)), df['y'].tolist()))
text_data += "|Forecasted data starting date|" + forecast['ds'][forecast['ds'] >= relevant_date.strftime("%Y-%m-%d")].iloc[0].strftime("%Y-%m-%d")
text_data += "|Forecasted data generated using the Prophet model|" + ','.join(map(lambda x: str(round(x)), forecast['yhat'][forecast['ds'] >= relevant_date.strftime("%Y-%m-%d")][7::7].tolist()))
text_data += "|Upper confidence interval|" + ','.join(map(lambda x: str(round(x)), forecast['yhat_upper'][forecast['ds'] >= relevant_date.strftime("%Y-%m-%d")][7::7].tolist()))
text_data += "|Lower confidence interval|" + ','.join(map(lambda x: str(round(x)), forecast['yhat_lower'][forecast['ds'] >= relevant_date.strftime("%Y-%m-%d")][7::7].tolist()))
text_data += "|"

keywords2 = ["nba"]
keyword2 = "nba"

pytrends = TrendReq(hl='en-US', tz=360)
pytrends.build_payload(keywords2, cat=0, timeframe='today 5-y', geo=selected_country_code, gprop='')
data = pytrends.interest_over_time()


df = data.reset_index().rename(columns={'date': 'ds', keyword2: 'y'})
forecast = fit_and_forecast(df)


# Find the latest date from df
relevant_date = df.iloc[-1]['ds'] 

text_data += f"Keyword number 2:{keywords2[0]} "
text_data += "|Actual data starting date|" + df['ds'].iloc[0].strftime("%Y-%m-%d")
text_data += "|Actual data from Google Trends|" + ','.join(map(lambda x: str(round(x)), df['y'].tolist()))
text_data += "|Forecasted data starting date|" + forecast['ds'][forecast['ds'] >= relevant_date.strftime("%Y-%m-%d")].iloc[0].strftime("%Y-%m-%d")
text_data += "|Forecasted data generated using the Prophet model|" + ','.join(map(lambda x: str(round(x)), forecast['yhat'][forecast['ds'] >= relevant_date.strftime("%Y-%m-%d")][7::7].tolist()))
text_data += "|Upper confidence interval|" + ','.join(map(lambda x: str(round(x)), forecast['yhat_upper'][forecast['ds'] >= relevant_date.strftime("%Y-%m-%d")][7::7].tolist()))
text_data += "|Lower confidence interval|" + ','.join(map(lambda x: str(round(x)), forecast['yhat_lower'][forecast['ds'] >= relevant_date.strftime("%Y-%m-%d")][7::7].tolist()))
text_data += "|"

text_data


01:03:30 - cmdstanpy - INFO - Chain [1] start processing
01:03:30 - cmdstanpy - INFO - Chain [1] done processing
01:03:31 - cmdstanpy - INFO - Chain [1] start processing
01:03:31 - cmdstanpy - INFO - Chain [1] done processing


'Keyword number 1:nfl |Actual data starting date|2019-02-17|Actual data from Google Trends|6,8,9,14,8,7,6,6,10,36,10,4,4,4,4,4,4,4,4,3,4,5,6,11,16,15,20,20,31,58,49,46,43,46,46,39,43,45,48,43,48,44,44,53,50,73,71,68,37,11,23,6,7,8,7,6,14,8,7,8,9,45,10,8,6,5,5,4,4,4,4,5,5,6,6,7,5,5,6,7,11,30,47,41,42,41,35,37,34,38,31,35,42,35,40,39,56,43,52,65,52,34,11,22,7,6,6,6,10,7,7,8,7,8,34,8,6,5,4,4,4,4,4,3,4,4,5,5,9,12,15,18,18,27,43,42,41,43,44,40,39,42,39,41,48,37,40,46,70,51,47,64,79,64,45,14,21,6,7,8,12,11,8,7,7,8,28,9,7,4,4,4,4,4,4,4,4,4,6,7,10,13,16,17,16,26,48,46,44,44,47,39,38,41,36,41,44,35,36,43,55,46,57,66,88,58,43,13,23,6,8,9,10,7,7,6,7,8,32,8,7,4,4,4,4,4,4,4,4,5,6,6,9,15,16,17,17,28,49,44,47,45,42,42,42,42,41,43,56,39,38,50,56,69,57,74,100,68,45,14|Forecasted data starting date|2024-02-04|Forecasted data generated using the Prophet model|13,9,10,13,14,12,9,9,13,19,22,19,13,8,6,7,8,8,7,7,7,8,9,10,12,15,16,17,22,30,41,49,51,49,45,43,44,45,45,44,45,46,47,48,51,57,66,72,72,62,46,29|Uppe

In [116]:
forecast['ds'][forecast['ds'] >= relevant_date.strftime("%Y-%m-%d")].iloc[0]

Timestamp('2024-02-04 00:00:00')

In [117]:
df['ds'].iloc[0]

Timestamp('2019-02-17 00:00:00')