In [1]:
from pytrends.request import TrendReq
import pandas as pd
import time
from datetime import datetime, timedelta

# Initialize Pytrends
pytrends = TrendReq(hl='en-US', tz=360)

# Define categories and keywords (Grouped into batches of 5)
categories = {
    "Jeans": ["skinny jeans", "loose jeans", "straight jeans", "slim jeans", "distressed jeans"],
    "Outerwear": ["leather jacket", "denim jacket", "bomber jacket", "pea coat", "parka"],
    "Sneakers": ["running shoes", "basketball sneakers", "retro sneakers", "chunky sneakers", "canvas sneakers"],
    "Boots": ["Doc Martens", "chelsea boots", "hiking boots", "chukkas", "Timberlands"],
    "Clothing_Brand": ["Zara", "H&M", "Uniqlo", "Abercrombie & Fitch", "Shein"],
    "Shoe_Brand": ["Nike", "New Balance", "Adidas", "Reebok", "Vans"]
}

# Define 5-year timeframe
timeframe = "today 5-y"  # Directly requests 5 years of data

# Data storage
combined_data = pd.DataFrame()

def fetch_data(pytrends, keywords, category):
    """
    Fetches Google Trends data for a batch of up to 5 keywords at a time.
    Handles rate limiting with retries and cooldowns.
    """
    global combined_data

    print(f"Fetching {keywords} for {timeframe}")

    try:
        pytrends.build_payload(keywords, timeframe=timeframe, geo='US')
        data = pytrends.interest_over_time()

        # Ensure data is not empty
        if data.empty:
            print(f"No data available for {keywords}. Skipping...")
            return

        # Drop 'isPartial' column if it exists
        if 'isPartial' in data.columns:
            data = data.drop(columns=['isPartial'])

        data = data.reset_index()

        # Normalize each keyword individually and reshape data
        for keyword in keywords:
            if keyword in data.columns:
                temp_df = data[["date", keyword]].rename(columns={keyword: "Search Interest"})
                temp_df["Keyword"] = keyword
                temp_df["Category"] = category

                # Normalize the search interest values
                temp_df["Search Interest"] = temp_df["Search Interest"] / temp_df["Search Interest"].max() * 100

                combined_data = pd.concat([combined_data, temp_df], ignore_index=True)
        
        time.sleep(30)  # Increased sleep time to avoid rate limits

    except Exception as e:
        print(f"Error fetching {keywords}: {e}")
        print("Waiting 60 seconds before retrying...")
        time.sleep(60)  # Extra delay before retrying
        fetch_data(pytrends, keywords, category)  # Retry once

# Fetch data for all categories in batches of 5
for category, keywords in categories.items():
    for i in range(0, len(keywords), 5):  # Query up to 5 at a time
        fetch_data(pytrends, keywords[i:i+5], category)

# Save to CSV
combined_data.to_csv("fashion_trends_5years.csv", index=False)
print("Data collection complete! Saved to fashion_trends_5years.csv.")

Fetching ['skinny jeans', 'loose jeans', 'straight jeans', 'slim jeans', 'distressed jeans'] for today 5-y


  df = df.fillna(False)


Fetching ['leather jacket', 'denim jacket', 'bomber jacket', 'pea coat', 'parka'] for today 5-y


  df = df.fillna(False)


Fetching ['running shoes', 'basketball sneakers', 'retro sneakers', 'chunky sneakers', 'canvas sneakers'] for today 5-y


  df = df.fillna(False)


Fetching ['combat boots', 'chelsea boots', 'hiking boots', 'chukkas', 'Timberlands'] for today 5-y


  df = df.fillna(False)


Fetching ['Zara', 'H&M', 'Uniqlo', 'Abercrombie & Fitch', 'Shein'] for today 5-y


  df = df.fillna(False)


Fetching ['Nike', 'New Balance', 'Adidas', 'Reebok', 'Vans'] for today 5-y


  df = df.fillna(False)


Data collection complete! Saved to fashion_trends_5years.csv.


  from .autonotebook import tqdm as notebook_tqdm


Unnamed: 0,date,Search Interest,Keyword,Category,Subcategory
0,2022-01-30,88.0,skinny jeans,Jeans,Jeans
1,2022-02-06,92.0,skinny jeans,Jeans,Jeans
2,2022-02-13,84.0,skinny jeans,Jeans,Jeans
3,2022-02-20,100.0,skinny jeans,Jeans,Jeans
4,2022-02-27,93.0,skinny jeans,Jeans,Jeans
...,...,...,...,...,...
95,2023-11-19,99.0,skinny jeans,Jeans,Jeans
96,2023-11-26,92.0,skinny jeans,Jeans,Jeans
97,2023-12-03,95.0,skinny jeans,Jeans,Jeans
98,2023-12-10,92.0,skinny jeans,Jeans,Jeans
