**pytrends**

Pytrends is a Python library that provides a simple interface for interacting with Google Trends data. Google Trends allows users to explore the popularity of search terms over time and across different regions.

With pytrends, you can fetch data such as the search interest for specific keywords, trends over time, geographic distribution of searches, related queries, and more. This data can be useful for various purposes, including market research, content planning, and trend analysis.

Pytrends allows you to access Google Trends data programmatically, making it easier to integrate this information into your Python applications, scripts, or data analysis workflows.

In [None]:
!pip install pytrends

In [None]:
%matplotlib inline
from pytrends.request import TrendReq
from datetime import timedelta, date
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# List of parameters
hl = 'en-US'
tz = 360  # Timezone Offset (in minutes), tz = 360 means US CST
kw_list = ['Bitcoin']  # keyword list, FIVE at maximum
geo = 'US'  # Two letter country abbreviation, 'US' for United States, default 'world'

start_date = date(year=2014, month=9, day=14)
end_date = date(year=2024, month=5, day=31)
timeframe = str(start_date) + ' ' + str(end_date)

# Connect to Google Trends
pytrends = TrendReq(hl=hl, tz=tz)
pytrends.build_payload(kw_list=kw_list, timeframe=timeframe, geo=geo)
df_monthly = pytrends.interest_over_time()

# Save df_monthly to a CSV file
df_monthly.to_csv('df_monthly.csv', index=True)

# Create a CSV file for daily data
filename = 'BTC-GOOGLE.csv'
with open(filename, 'w') as csvfile:
    filewriter = csv.writer(csvfile, delimiter=',')
    filewriter.writerow(['Date', 'Bitcoin'])  # title

END_DATE = date(year=2024, month=5, day=10)
one_month = relativedelta(months=+1)

start_date = date(year=2014, month=10, day=1)

# Fill gaps of each month and write into the CSV file
while start_date <= END_DATE:
    end_date = start_date + one_month - timedelta(days=1)
    timeframe = str(start_date) + ' ' + str(end_date)

    # Fetch daily data
    pytrends.build_payload(kw_list=kw_list, timeframe=timeframe, geo=geo)
    df_daily = pytrends.interest_over_time()

    # Ensure the dates exist in both DataFrames
    try:
        if df_daily.empty or start_date not in df_daily.index or end_date not in df_daily.index:
            print(f"Data not available for the timeframe: {timeframe}")
            start_date = start_date + one_month
            continue

        if start_date not in df_monthly.index or end_date not in df_monthly.index:
            print(f"Monthly data not available for start date: {start_date} or end date: {end_date}")
            start_date = start_date + one_month
            continue

        # Calculate the equation parameters
        if df_daily.loc[start_date, 'Bitcoin'] == df_daily.loc[end_date, 'Bitcoin']:
            a = 0
            b = 0
        else:
            a = (df_monthly.loc[end_date, 'Bitcoin'] - df_monthly.loc[start_date, 'Bitcoin']) / (df_daily.loc[end_date, 'Bitcoin'] - df_daily.loc[start_date, 'Bitcoin'])
            b = df_monthly.loc[end_date, 'Bitcoin'] - a * df_daily.loc[start_date, 'Bitcoin']

        # Convert the data to fill the monthly gap
        df_daily['Bitcoin'] = df_daily['Bitcoin'].apply(lambda x: a * x + b if (a * x + b) > 0 else 0)

        # Append the result to the CSV file
        df_daily[:-1].reset_index().to_csv(filename, mode='a', header=False, columns=['date', 'Bitcoin'], index=False)

        # Set 1 second waiting time, in case Google will deny my access.
        time.sleep(1)
        print(timeframe + ' finished.')
    except KeyError as e:
        print(f"KeyError encountered: {e}")
    except Exception as e:
        print(f"Unexpected error: {e}")

    start_date = start_date + one_month

print('Cheers! Get the job done.')  # the csv file is 'BTC-GOOGLE.csv'

In [None]:
# the monthly dataset
print('min: ' + str(min(df_monthly['Bitcoin'])))
print('max: ' + str(max(df_monthly['Bitcoin'])))

print(df_monthly.head(3))
print(df_monthly.tail(3))

# comparing the monthly data directly from Google trend, we add lots of more details
df_monthly.plot(figsize = (18,9))

In [None]:
import pandas as pd
from datetime import date, timedelta
from dateutil.relativedelta import relativedelta

# Assuming df_monthly is already fetched and contains your data
# Min-Max normalization to scale values between 0 and 1
df_monthly['Bitcoin'] = (df_monthly['Bitcoin'] - df_monthly['Bitcoin'].min()) / (df_monthly['Bitcoin'].max() - df_monthly['Bitcoin'].min())

# Save normalized df_monthly to a CSV file
df_monthly.to_csv('df_monthly_normalized.csv', index=True)

# Create a new DataFrame to store daily data
df_daily_expanded = pd.DataFrame()

# Expand monthly data to daily data
for idx, row in df_monthly.iterrows():
    month_start = idx
    month_end = (idx + relativedelta(months=+1) - timedelta(days=1)).to_pydatetime()
    date_range = pd.date_range(start=month_start, end=month_end, freq='D')
    daily_values = pd.DataFrame(date_range, columns=['Date'])
    daily_values['Bitcoin'] = row['Bitcoin']
    df_daily_expanded = pd.concat([df_daily_expanded, daily_values])

# Ensure the index is set correctly
df_daily_expanded.set_index('Date', inplace=True)

# Save the expanded daily data to a CSV file
df_daily_expanded.to_csv('df_daily_expanded.csv')

print('Normalized monthly data saved as df_monthly_normalized.csv')
print('Expanded daily data saved as df_daily_expanded.csv')

In [None]:
df_daily_expanded.plot(figsize = (18,9))

In [None]:
df_monthly = pd.read_csv('df_monthly.csv')
# Calculate the percentage change month-to-month
df_monthly['Change'] = df_monthly['Bitcoin'].pct_change()

# Drop the first row which will have NaN change
df_monthly.dropna(inplace=True)

# Reset index to clean up the DataFrame
df_monthly.reset_index(drop=True, inplace=True)

df_monthly

In [None]:
df_monthly.to_csv('df_monthly_change.csv', index=True)