In [2]:
import pandas as pd
import os
from googleapiclient.discovery import build

# 1. SETUP: API Key and YouTube client
API_KEY = os.getenv("you_tube_api")  # Ensure environment variable is set
if not API_KEY:
    raise ValueError("YouTube API key not found. Please set the 'you_tube_api' environment variable.")

youtube = build("youtube", "v3", developerKey=API_KEY)

# 2. SEARCH QUERIES
search_queries = [
    "Abercrombie & Fitch haul",
    "Abercrombie try-on",
    "Abercrombie jeans review",
    "curve love",
    "abercrombie",
    "Abercrombie",
    "Abercrombie fall fashion",
    "Abercrombie winter outfits",
    "Abercrombie shopping haul"
]

# 3. KEYWORDS TO EXCLUDE
exclude_keywords = ["ceo", "scandal", "charged", "arrested", "indicted", "trafficking", "lawsuit"]

# ------------------------------------------------
# STEP A: Perform YouTube Searches with Pagination
# ------------------------------------------------

all_video_ids = set()  # Use a set to avoid duplicates across queries

for query in search_queries:
    print(f"Searching for: {query}")
    page_token = None

    while True:
        search_response = youtube.search().list(
            q=query,
            type="video",
            part="id,snippet",
            # The maximum number of results in a single response
            # must be <= 50 per YouTube API rules.
            maxResults=50,
            publishedAfter='2020-01-01T00:00:00Z',
            publishedBefore='2025-01-01T00:00:00Z',
            pageToken=page_token
        ).execute()

        # Collect video IDs from this page of results
        items = search_response.get("items", [])
        for item in items:
            video_id = item["id"]["videoId"]
            all_video_ids.add(video_id)

        # Check if there's another page
        page_token = search_response.get("nextPageToken")
        if not page_token:
            break

print(f"\nFound a total of {len(all_video_ids)} unique video IDs from all queries.\n")

# ------------------------------------------------
# STEP B: Fetch Video Statistics in Batches of 50
# ------------------------------------------------

all_video_details = []

# Convert set to list for easier slicing
all_video_ids_list = list(all_video_ids)

for i in range(0, len(all_video_ids_list), 50):
    # Get a slice of up to 50 IDs
    subset_ids = all_video_ids_list[i : i + 50]

    # Request stats for this batch
    video_stats_response = youtube.videos().list(
        part="snippet,statistics",
        id=",".join(subset_ids)
    ).execute()

    # Add the items from this response to our master list
    all_video_details.extend(video_stats_response.get("items", []))

print(f"Retrieved statistics for {len(all_video_details)} videos.\n")

# ------------------------------------------------
# STEP C: Filter Out Videos with Excluded Keywords
# ------------------------------------------------

filtered_video_data = []

for video in all_video_details:
    snippet = video.get("snippet", {})
    stats = video.get("statistics", {})

    title = snippet.get("title", "")
    title_lower = title.lower()

    # Check if any excluded keyword is in the title
    if not any(excluded in title_lower for excluded in exclude_keywords):
        filtered_video_data.append({
            "Title": title,
            "Views": int(stats.get("viewCount", 0)),
            "Likes": int(stats.get("likeCount", 0)),
            "Comments": int(stats.get("commentCount", 0)),
            "Published Date": snippet.get("publishedAt"),
            "Video ID": video.get("id")
        })

print(f"Number of filtered videos (without excluded keywords): {len(filtered_video_data)}\n")

# OPTIONAL: Print some sample data
for vid in filtered_video_data[:5]:
    print(vid)


Searching for: Abercrombie & Fitch haul


HttpError: <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/search?q=Abercrombie+%26+Fitch+haul&type=video&part=id%2Csnippet&maxResults=50&publishedAfter=2020-01-01T00%3A00%3A00Z&publishedBefore=2025-01-01T00%3A00%3A00Z&key=AIzaSyDe_5sxeLJE4Mj0JXQkenQjoi5Kay6367o&alt=json returned "The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.". Details: "[{'message': 'The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.', 'domain': 'youtube.quota', 'reason': 'quotaExceeded'}]">

In [3]:
df = pd.DataFrame(filtered_video_data)

# Now you have a pandas DataFrame; you can print, analyze, etc.
print(df.head())

# Save to CSV w
df.to_csv("my_youtube_data.csv", index=False, encoding="utf-8")

print("Saved DataFrame to 'my_youtube_data.csv'.")

                                               Title  Views  Likes  Comments  \
0  SUMMER TRY-ON HAUL | ABERCROMBIE, MASSIMO DUTT...  27156    647        29   
1  Casual Winter Outfits ❄ | Amazon Corset | Get ...   7579    240         4   
2  Abercrombie Haul. Shop on description😉 #abercr...   6139     58         0   
3  Best effort to save a goal.. #efootball #pes20...   1394     30         0   
4       Abercrombie fall sweater vest 😍 #abercrombie    609      6         2   

         Published Date     Video ID  
0  2024-06-07T16:49:41Z  mJtVWvBLltg  
1  2023-01-31T16:52:14Z  4DfMiBEZm6Q  
2  2022-08-31T14:55:56Z  f9ZbRX1ZeGk  
3  2023-04-10T07:08:44Z  vdF7WLM7q4w  
4  2024-09-13T17:18:36Z  -kQoE1P4fnE  
Saved DataFrame to 'my_youtube_data.csv'.


In [5]:
df

Unnamed: 0,Title,Views,Likes,Comments,Published Date,Video ID
0,"SUMMER TRY-ON HAUL | ABERCROMBIE, MASSIMO DUTT...",27156,647,29,2024-06-07T16:49:41Z,mJtVWvBLltg
1,Casual Winter Outfits ❄ | Amazon Corset | Get ...,7579,240,4,2023-01-31T16:52:14Z,4DfMiBEZm6Q
2,Abercrombie Haul. Shop on description😉 #abercr...,6139,58,0,2022-08-31T14:55:56Z,f9ZbRX1ZeGk
3,Best effort to save a goal.. #efootball #pes20...,1394,30,0,2023-04-10T07:08:44Z,vdF7WLM7q4w
4,Abercrombie fall sweater vest 😍 #abercrombie,609,6,2,2024-09-13T17:18:36Z,-kQoE1P4fnE
...,...,...,...,...,...,...
2673,EFFORTLESSLY CHIC SEZANE SPRING LOOKBOOK | THE...,3796,108,18,2023-03-09T16:14:05Z,7mJtpkLB58A
2674,Abercrombie & Fitch Rebranded… and it’s amazin...,1225,28,3,2023-08-22T05:05:38Z,bneNL__G2Lo
2675,"ABERCROMBIE: WINTER, SPRING & SUMMER HAUL | SALE!",1702,53,8,2024-12-28T09:00:08Z,7h0Vu9gz64k
2676,Perfectly Loose & Flattering Abercrombie Jeans...,4955,52,3,2023-11-02T15:00:46Z,wFMCQFSPffQ


In [9]:
# Convert 'Published Date' to datetime and keep only the date
df["Published Date"] = pd.to_datetime(df["Published Date"]).dt.date

# Aggregate YouTube data by date (sum Views, Likes, and Comments for the same date)
youtube_summary = df.groupby("Published Date").agg({
    "Views": "sum",
    "Likes": "sum",
    "Comments": "sum"
}).reset_index()

In [7]:
df

Unnamed: 0,Title,Views,Likes,Comments,Published Date,Video ID
0,"SUMMER TRY-ON HAUL | ABERCROMBIE, MASSIMO DUTT...",27156,647,29,2024-06-07,mJtVWvBLltg
1,Casual Winter Outfits ❄ | Amazon Corset | Get ...,7579,240,4,2023-01-31,4DfMiBEZm6Q
2,Abercrombie Haul. Shop on description😉 #abercr...,6139,58,0,2022-08-31,f9ZbRX1ZeGk
3,Best effort to save a goal.. #efootball #pes20...,1394,30,0,2023-04-10,vdF7WLM7q4w
4,Abercrombie fall sweater vest 😍 #abercrombie,609,6,2,2024-09-13,-kQoE1P4fnE
...,...,...,...,...,...,...
2673,EFFORTLESSLY CHIC SEZANE SPRING LOOKBOOK | THE...,3796,108,18,2023-03-09,7mJtpkLB58A
2674,Abercrombie & Fitch Rebranded… and it’s amazin...,1225,28,3,2023-08-22,bneNL__G2Lo
2675,"ABERCROMBIE: WINTER, SPRING & SUMMER HAUL | SALE!",1702,53,8,2024-12-28,7h0Vu9gz64k
2676,Perfectly Loose & Flattering Abercrombie Jeans...,4955,52,3,2023-11-02,wFMCQFSPffQ


In [8]:
df.dtypes

Title             object
Views              int64
Likes              int64
Comments           int64
Published Date    object
Video ID          object
dtype: object

In [21]:
df

Unnamed: 0_level_0,Title,Views,Likes,Comments,Video ID
Published Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-01-08,"winter clothing tryon haul (Urban Outfitters, ...",502,13,0,LU1eoN5XwdI
2020-01-09,HUGE Abercrombie Haul - Winter to Spring outfi...,13148,391,33,oVYsjmCiEog
2020-02-02,My ABERCROMBIE & FITCH Haul & Try-On,29590,1194,291,_ShQb6aRhXA
2020-02-03,"Shopping Haul!!! (PINK, Abercrombie Kids, ULTA...",105,5,1,lG1LGKVnTX4
2020-02-10,"SHOPPING HAUL!!!! PINK, ABERCROMBIE, ROSS (GRE...",157,5,3,fNZI5qaJNNQ
...,...,...,...,...,...
2024-12-30,How to pronounce Abercrombie & Fitch,13358,383,4,JEv-30287r4
2024-12-31,Abercrombie Loose Jean Review #abercrombie,1744,45,1,lcrg7q8dY7k
2024-12-31,@NotEnoughNelsons @nenfam #paisleenelson #dupe...,4587,85,1,r7zHJD5QQy0
2024-12-31,"Vlog: New Hairstyle, Come Shopping With Me (Za...",15035,531,63,7svPcfRUHdU


In [10]:
df["Published Date"] = pd.to_datetime(df["Published Date"], errors="coerce")
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2678 entries, 0 to 2677
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   Title           2678 non-null   object        
 1   Views           2678 non-null   int64         
 2   Likes           2678 non-null   int64         
 3   Comments        2678 non-null   int64         
 4   Published Date  2678 non-null   datetime64[ns]
 5   Video ID        2678 non-null   object        
dtypes: datetime64[ns](1), int64(3), object(2)
memory usage: 125.7+ KB


In [11]:
df["Published Date"] = pd.to_datetime(df["Published Date"]).dt.date
df

Unnamed: 0,Title,Views,Likes,Comments,Published Date,Video ID
0,"SUMMER TRY-ON HAUL | ABERCROMBIE, MASSIMO DUTT...",27156,647,29,2024-06-07,mJtVWvBLltg
1,Casual Winter Outfits ❄ | Amazon Corset | Get ...,7579,240,4,2023-01-31,4DfMiBEZm6Q
2,Abercrombie Haul. Shop on description😉 #abercr...,6139,58,0,2022-08-31,f9ZbRX1ZeGk
3,Best effort to save a goal.. #efootball #pes20...,1394,30,0,2023-04-10,vdF7WLM7q4w
4,Abercrombie fall sweater vest 😍 #abercrombie,609,6,2,2024-09-13,-kQoE1P4fnE
...,...,...,...,...,...,...
2673,EFFORTLESSLY CHIC SEZANE SPRING LOOKBOOK | THE...,3796,108,18,2023-03-09,7mJtpkLB58A
2674,Abercrombie & Fitch Rebranded… and it’s amazin...,1225,28,3,2023-08-22,bneNL__G2Lo
2675,"ABERCROMBIE: WINTER, SPRING & SUMMER HAUL | SALE!",1702,53,8,2024-12-28,7h0Vu9gz64k
2676,Perfectly Loose & Flattering Abercrombie Jeans...,4955,52,3,2023-11-02,wFMCQFSPffQ


In [19]:
df.columns

Index(['Title', 'Views', 'Likes', 'Comments', 'Video ID'], dtype='object')

In [16]:
# 2. Set "Published Date" as the DataFrame index
df.set_index("Published Date", inplace=True)

# 3. Sort by the new datetime index (optional but recommended)
df.sort_index(inplace=True)

# Check the first few rows
print(df.head())

KeyError: "None of ['Published Date'] are in the columns"

In [15]:
print(df.index)
print(df.info())


Index([2020-01-08, 2020-01-09, 2020-02-02, 2020-02-03, 2020-02-10, 2020-02-25,
       2020-03-10, 2020-03-13, 2020-03-17, 2020-03-23,
       ...
       2024-12-28, 2024-12-28, 2024-12-29, 2024-12-29, 2024-12-30, 2024-12-30,
       2024-12-31, 2024-12-31, 2024-12-31, 2024-12-31],
      dtype='object', name='Published Date', length=2678)
<class 'pandas.core.frame.DataFrame'>
Index: 2678 entries, 2020-01-08 to 2024-12-31
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Title     2678 non-null   object
 1   Views     2678 non-null   int64 
 2   Likes     2678 non-null   int64 
 3   Comments  2678 non-null   int64 
 4   Video ID  2678 non-null   object
dtypes: int64(3), object(2)
memory usage: 125.5+ KB
None


In [22]:
# Convert the current index to datetime
df.index = pd.to_datetime(df.index, errors="coerce")

# Now it should be a DatetimeIndex
print(df.index)
print(df.info())  # verify the new dtype


DatetimeIndex(['2020-01-08', '2020-01-09', '2020-02-02', '2020-02-03',
               '2020-02-10', '2020-02-25', '2020-03-10', '2020-03-13',
               '2020-03-17', '2020-03-23',
               ...
               '2024-12-28', '2024-12-28', '2024-12-29', '2024-12-29',
               '2024-12-30', '2024-12-30', '2024-12-31', '2024-12-31',
               '2024-12-31', '2024-12-31'],
              dtype='datetime64[ns]', name='Published Date', length=2678, freq=None)
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2678 entries, 2020-01-08 to 2024-12-31
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Title     2678 non-null   object
 1   Views     2678 non-null   int64 
 2   Likes     2678 non-null   int64 
 3   Comments  2678 non-null   int64 
 4   Video ID  2678 non-null   object
dtypes: int64(3), object(2)
memory usage: 125.5+ KB
None


In [23]:
# Example: Quarterly aggregation
df_quarterly = df.resample("Q").sum()
df_quarterly["Video_Count"] = df.resample("Q")["Title"].count()

df_quarterly.reset_index(inplace=True)
df_quarterly.rename(columns={"Published Date": "Quarter"}, inplace=True)

print(df_quarterly.head())


     Quarter                                              Title     Views  \
0 2020-03-31  winter clothing tryon haul (Urban Outfitters, ...    178520   
1 2020-06-30  📖 Genialne Fantasy! -  Joe Abercrombie Trylogi...  15366278   
2 2020-09-30  BEST PETITE DENIM JEANS TRY-ON & REVIEW / Levi...    338531   
3 2020-12-31  A&F 90's Straight Ultra High Rise Jean Review ...  15216055   
4 2021-03-31  Abercrombie and Fitch + Hollister TRY-ON HAUL ...   8583548   

    Likes  Comments                                           Video ID  \
0    4416       724  LU1eoN5XwdIoVYsjmCiEog_ShQb6aRhXAlG1LGKVnTX4fN...   
1  149656     40572  THBt_02JOnwZ12X9qnrL_cCiiHSOpnWPwEHzaTl1YCMYcr...   
2   10457      1621  dXmU5ektbvY2biRVTfd_zMWMT999Y7cuIQvsrMyPOkz0PA...   
3   82214      1728  s0MJjGNV1g04ixaHHMuMF8KhlE66AMnEQKrvT29XS-Zk92...   
4   92132      7037  kqNd9SinWBQwDhhzpnVYmkU9NFa1zcABcl2pZntqkubI3H...   

   Video_Count  
0           11  
1           25  
2           39  
3           49  
4      

  df_quarterly = df.resample("Q").sum()
  df_quarterly["Video_Count"] = df.resample("Q")["Title"].count()


In [24]:
df_quarterly.to_csv("my_youtube_data.csv", index=False, encoding="utf-8")

In [25]:
df_quarterly

Unnamed: 0,Quarter,Title,Views,Likes,Comments,Video ID,Video_Count
0,2020-03-31,"winter clothing tryon haul (Urban Outfitters, ...",178520,4416,724,LU1eoN5XwdIoVYsjmCiEog_ShQb6aRhXAlG1LGKVnTX4fN...,11
1,2020-06-30,📖 Genialne Fantasy! - Joe Abercrombie Trylogi...,15366278,149656,40572,THBt_02JOnwZ12X9qnrL_cCiiHSOpnWPwEHzaTl1YCMYcr...,25
2,2020-09-30,BEST PETITE DENIM JEANS TRY-ON & REVIEW / Levi...,338531,10457,1621,dXmU5ektbvY2biRVTfd_zMWMT999Y7cuIQvsrMyPOkz0PA...,39
3,2020-12-31,A&F 90's Straight Ultra High Rise Jean Review ...,15216055,82214,1728,s0MJjGNV1g04ixaHHMuMF8KhlE66AMnEQKrvT29XS-Zk92...,49
4,2021-03-31,Abercrombie and Fitch + Hollister TRY-ON HAUL ...,8583548,92132,7037,kqNd9SinWBQwDhhzpnVYmkU9NFa1zcABcl2pZntqkubI3H...,57
5,2021-06-30,Cómo leer a Joe Abercrombie | Guía de lectura1...,13742932,256043,9564,95JJrEaDbOkMw7_utD6MS0QJhidVh-xAgL3beyBJqRCI2M...,49
6,2021-09-30,TESTING OUT THE POPULAR ABERCROMBIE JEANS (US ...,8738494,164152,4101,770HfbGIFdUECqQucbp14EBl16o6vB4V8JCDSDjVjzY4GN...,68
7,2021-12-31,MANGO try On Fall HAUL 2021 ABERCROMBIE Shoppi...,1758087,40825,3151,_GF7yeVzuvQAyA68Y8mHPsltyBoY2zwMoNWhua1nWF5wAt...,99
8,2022-03-31,Abercrombie Winter HaulBest High Rise Jeans fo...,3487678,79034,5690,Kh_ShDO9MXUXk3XBgEGodI60Q5o9T0c2cAEZ9vIN7UIUyz...,126
9,2022-06-30,2022 Abercrombie & Fitch Spring Try On Haul | ...,3218844,120073,4582,qMCPAUZGoC8CMxKW_NKjNsJzXlJSadJ34fYItvLpyrk0TX...,117
