In [1]:
import os
from dotenv import load_dotenv
from pathlib import Path

parent_env_path = Path.cwd().parent / ".env"
load_dotenv(dotenv_path=parent_env_path)
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
project_id = os.getenv("PROJECT_ID")

In [2]:
from google.cloud import storage
from google.cloud import bigquery
import pandas as pd
from io import StringIO
import plotly.express as px
from langdetect import detect, DetectorFactory
DetectorFactory.seed = 0

In [3]:
pd.set_option('display.max_columns', None)

In [4]:
client = storage.Client(project=project_id)
bucket = client.bucket("ytbdata")
path =r'1_crawl/channel_raw_info'

blobs = bucket.list_blobs(prefix=path)
list_file = []
for blob in blobs:
    list_file.append(blob.name)
list_file



['1_crawl/channel_raw_info/channel_raw_info_20250614.json']

In [5]:
#download files 
for file in list_file:
    blob = bucket.blob(file)
    content = blob.download_as_text()
    df = pd.read_json(StringIO(content))
    print(f"Processing {file} with {len(df)} rows")


Processing 1_crawl/channel_raw_info/channel_raw_info_20250614.json with 4230 rows


In [6]:
df.head()

Unnamed: 0,kind,etag,id,snippet,contentDetails,statistics,topicDetails
0,youtube#channel,PHvKkZujxh6sbIk4W2FdkSOhb-0,UCCK6b9W7w4dJ6ZtclFZdpsA,"{'title': 'MoeMoe', 'description': 'Just a sim...","{'relatedPlaylists': {'likes': '', 'uploads': ...","{'viewCount': '685008', 'subscriberCount': '22...","{'topicIds': ['/m/0bzvm2', '/m/02jjt'], 'topic..."
1,youtube#channel,JkuJ7ZgGrF1SGP7GhdGdQbujclE,UCW7AGm8JSBEEew61dJIgl_A,"{'title': 'Tom Cardy', 'description': 'Music a...","{'relatedPlaylists': {'likes': '', 'uploads': ...","{'viewCount': '530671762', 'subscriberCount': ...","{'topicIds': ['/m/05rwpb', '/m/04rlf', '/m/064..."
2,youtube#channel,69waZ5gTacBkY7xVN-YCkDsjs9w,UCRVtlcqayOmyuLIDrT3ng1w,"{'title': 'Jay & Sharon', 'description': 'Jay ...","{'relatedPlaylists': {'likes': '', 'uploads': ...","{'viewCount': '10245270378', 'subscriberCount'...","{'topicIds': ['/m/02jjt', '/m/019_rr'], 'topic..."
3,youtube#channel,xDOJ09DwDM6hpwZRuYCf3JMKLMs,UCJ-lUkI7HFBXUG16RYQYWxw,"{'title': 'Cartrack', 'description': 'As a glo...","{'relatedPlaylists': {'likes': '', 'uploads': ...","{'viewCount': '4799700', 'subscriberCount': '1...","{'topicIds': ['/m/07yv9', '/m/019_rr'], 'topic..."
4,youtube#channel,bSzdwbE-9zsMNEVjU6lRKrPAo34,UCFGBxznLew-4-j0LESic87A,"{'title': 'Places To Visit', 'description': 'I...","{'relatedPlaylists': {'likes': '', 'uploads': ...","{'viewCount': '32186260', 'subscriberCount': '...","{'topicIds': ['/m/019_rr', '/g/120yrv6h'], 'to..."


In [7]:
def split_json_column(df, column):
    # Convert the JSON string to a dictionary
    df[column] = df[column].apply(lambda x: eval(x) if isinstance(x, str) else x)
    
    # Normalize the JSON column into separate columns
    json_df = pd.json_normalize(df[column])
    
    # Concatenate the new columns with the original DataFrame
    df = pd.concat([df.drop(columns=[column]), json_df], axis=1)
    
    return df

In [8]:
df =  split_json_column(df, 'snippet')
df = split_json_column(df, 'statistics')
df = split_json_column(df, 'contentDetails')
df = split_json_column(df, 'topicDetails')

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4230 entries, 0 to 4229
Data columns (total 28 columns):
 #   Column                     Non-Null Count  Dtype 
---  ------                     --------------  ----- 
 0   kind                       4230 non-null   object
 1   etag                       4230 non-null   object
 2   id                         4230 non-null   object
 3   title                      4230 non-null   object
 4   description                4230 non-null   object
 5   customUrl                  4227 non-null   object
 6   publishedAt                4230 non-null   object
 7   country                    3355 non-null   object
 8   thumbnails.default.url     4230 non-null   object
 9   thumbnails.default.width   4230 non-null   int64 
 10  thumbnails.default.height  4230 non-null   int64 
 11  thumbnails.medium.url      4230 non-null   object
 12  thumbnails.medium.width    4230 non-null   int64 
 13  thumbnails.medium.height   4230 non-null   int64 
 14  thumbnai

In [10]:
df.head()

Unnamed: 0,kind,etag,id,title,description,customUrl,publishedAt,country,thumbnails.default.url,thumbnails.default.width,thumbnails.default.height,thumbnails.medium.url,thumbnails.medium.width,thumbnails.medium.height,thumbnails.high.url,thumbnails.high.width,thumbnails.high.height,localized.title,localized.description,defaultLanguage,viewCount,subscriberCount,hiddenSubscriberCount,videoCount,relatedPlaylists.likes,relatedPlaylists.uploads,topicIds,topicCategories
0,youtube#channel,PHvKkZujxh6sbIk4W2FdkSOhb-0,UCCK6b9W7w4dJ6ZtclFZdpsA,MoeMoe,Just a simple guy having fun on the internet 🤷‍♂️,@moemoe-chan69,2024-05-08T19:27:43.191954Z,GE,https://yt3.ggpht.com/aIGr-Kts_3atMm9vJZVE4Opw...,88,88,https://yt3.ggpht.com/aIGr-Kts_3atMm9vJZVE4Opw...,240,240,https://yt3.ggpht.com/aIGr-Kts_3atMm9vJZVE4Opw...,800,800,MoeMoe,Just a simple guy having fun on the internet 🤷‍♂️,,685008,2280,False,331,,UUCK6b9W7w4dJ6ZtclFZdpsA,"[/m/0bzvm2, /m/02jjt]",[https://en.wikipedia.org/wiki/Video_game_cult...
1,youtube#channel,JkuJ7ZgGrF1SGP7GhdGdQbujclE,UCW7AGm8JSBEEew61dJIgl_A,Tom Cardy,Music and comedy babyyyyy\n\nInsta- tomycardy \n,@tomcardy1,2013-07-09T13:12:19Z,,https://yt3.ggpht.com/ytc/AIdro_mWI81331y_49Ee...,88,88,https://yt3.ggpht.com/ytc/AIdro_mWI81331y_49Ee...,240,240,https://yt3.ggpht.com/ytc/AIdro_mWI81331y_49Ee...,800,800,Tom Cardy,Music and comedy babyyyyy\n\nInsta- tomycardy \n,,530671762,1910000,False,122,,UUW7AGm8JSBEEew61dJIgl_A,"[/m/05rwpb, /m/04rlf, /m/064t9]",[https://en.wikipedia.org/wiki/Independent_mus...
2,youtube#channel,69waZ5gTacBkY7xVN-YCkDsjs9w,UCRVtlcqayOmyuLIDrT3ng1w,Jay & Sharon,Jay & Sharon\none boring flavor...original \n\...,@jaysharon,2020-06-27T04:27:20.760939Z,US,https://yt3.ggpht.com/wrcB-GNUFqz2b59PoWdwFwkB...,88,88,https://yt3.ggpht.com/wrcB-GNUFqz2b59PoWdwFwkB...,240,240,https://yt3.ggpht.com/wrcB-GNUFqz2b59PoWdwFwkB...,800,800,Jay & Sharon,Jay & Sharon\none boring flavor...original \n\...,zh-TW,10245270378,9700000,False,515,,UURVtlcqayOmyuLIDrT3ng1w,"[/m/02jjt, /m/019_rr]","[https://en.wikipedia.org/wiki/Entertainment, ..."
3,youtube#channel,xDOJ09DwDM6hpwZRuYCf3JMKLMs,UCJ-lUkI7HFBXUG16RYQYWxw,Cartrack,"As a global company, we offer data analytic so...",@cartrack,2014-02-17T08:48:16Z,ZA,https://yt3.ggpht.com/6AkvKRvgtYPh-GNuVeiH13CV...,88,88,https://yt3.ggpht.com/6AkvKRvgtYPh-GNuVeiH13CV...,240,240,https://yt3.ggpht.com/6AkvKRvgtYPh-GNuVeiH13CV...,800,800,Cartrack,"As a global company, we offer data analytic so...",,4799700,13400,False,110,,UUJ-lUkI7HFBXUG16RYQYWxw,"[/m/07yv9, /m/019_rr]","[https://en.wikipedia.org/wiki/Vehicle, https:..."
4,youtube#channel,bSzdwbE-9zsMNEVjU6lRKrPAo34,UCFGBxznLew-4-j0LESic87A,Places To Visit,I record videos of events such as cultural cel...,@placestovisit,2008-04-19T04:11:13Z,US,https://yt3.ggpht.com/VYxuEL-ZLYXmYQdyQgtM-1za...,88,88,https://yt3.ggpht.com/VYxuEL-ZLYXmYQdyQgtM-1za...,240,240,https://yt3.ggpht.com/VYxuEL-ZLYXmYQdyQgtM-1za...,800,800,Places To Visit,I record videos of events such as cultural cel...,,32186260,90600,False,2241,,UUFGBxznLew-4-j0LESic87A,"[/m/019_rr, /g/120yrv6h]",[https://en.wikipedia.org/wiki/Lifestyle_(soci...


In [11]:
df['id'].duplicated().sum()

0

Meaning of each columns: 

| Column Name                 | Description                                                                                               |
| --------------------------- | --------------------------------------------------------------------------------------------------------- |
| `kind`                      | The resource type. For channels, it's typically `"youtube#channel"`.                                      |
| `etag`                      | A version control tag used to detect changes in the resource.                                             |
| `id`                        | The unique YouTube Channel ID.                                                                            |
| `title`                     | The channel's display name/title.                                                                         |
| `description`               | The description text that the channel owner has written.                                                  |
| `customUrl`                 | The custom URL set by the channel owner (e.g., `youtube.com/c/YourName`). Might be null if not set.       |
| `publishedAt`               | The ISO 8601 timestamp when the channel was created.                                                      |
| `country`                   | The country associated with the channel, if set (e.g., `"US"` or `"VN"`).                                 |
| `thumbnails.default.url`    | URL to the default (small) thumbnail image of the channel.                                                |
| `thumbnails.default.width`  | Width in pixels of the default thumbnail.                                                                 |
| `thumbnails.default.height` | Height in pixels of the default thumbnail.                                                                |
| `thumbnails.medium.url`     | URL to the medium-size thumbnail image.                                                                   |
| `thumbnails.medium.width`   | Width of the medium thumbnail.                                                                            |
| `thumbnails.medium.height`  | Height of the medium thumbnail.                                                                           |
| `thumbnails.high.url`       | URL to the high-resolution thumbnail image.                                                               |
| `thumbnails.high.width`     | Width of the high-res thumbnail.                                                                          |
| `thumbnails.high.height`    | Height of the high-res thumbnail.                                                                         |
| `localized.title`           | Channel title in the localized language (if available).                                                   |
| `localized.description`     | Localized description.                                                                                    |
| `defaultLanguage`           | The default language of the channel content (e.g., `"en"`). Often null.                                   |
| `viewCount`                 | Total number of views the channel has received. |
| `subscriberCount`           | Number of subscribers. May be `"0"` or null if `hiddenSubscriberCount` is `True`.                         |
| `hiddenSubscriberCount`     | Boolean indicating whether the channel hides its subscriber count.                                        |
| `videoCount`                | Total number of uploaded videos on the channel.                                                           |
| `relatedPlaylists.likes`    | ID of the playlist containing all videos the channel has liked, often null.                                 |
| `relatedPlaylists.uploads`  | ID of the playlist containing all videos uploaded by the channel.                                         |
| `topicIds`                  | List of Freebase topic IDs associated with the channel (e.g., sports, education). May be null.            |
| `topicCategories`           | URLs of the Wikipedia topic categories that describe the channel content.                                 |



In [12]:
#drop columns
df.drop(columns=['etag', 'kind', 'thumbnails.default.url', 'thumbnails.default.width', 'thumbnails.default.height',
                     'thumbnails.medium.url', 'thumbnails.medium.width', 'thumbnails.medium.height',
                     'thumbnails.high.url', 'thumbnails.high.width', 'thumbnails.high.height',
                     'relatedPlaylists.likes', 'topicIds','customUrl'
             ], inplace=True)

In [13]:
df.head(5)

Unnamed: 0,id,title,description,publishedAt,country,localized.title,localized.description,defaultLanguage,viewCount,subscriberCount,hiddenSubscriberCount,videoCount,relatedPlaylists.uploads,topicCategories
0,UCCK6b9W7w4dJ6ZtclFZdpsA,MoeMoe,Just a simple guy having fun on the internet 🤷‍♂️,2024-05-08T19:27:43.191954Z,GE,MoeMoe,Just a simple guy having fun on the internet 🤷‍♂️,,685008,2280,False,331,UUCK6b9W7w4dJ6ZtclFZdpsA,[https://en.wikipedia.org/wiki/Video_game_cult...
1,UCW7AGm8JSBEEew61dJIgl_A,Tom Cardy,Music and comedy babyyyyy\n\nInsta- tomycardy \n,2013-07-09T13:12:19Z,,Tom Cardy,Music and comedy babyyyyy\n\nInsta- tomycardy \n,,530671762,1910000,False,122,UUW7AGm8JSBEEew61dJIgl_A,[https://en.wikipedia.org/wiki/Independent_mus...
2,UCRVtlcqayOmyuLIDrT3ng1w,Jay & Sharon,Jay & Sharon\none boring flavor...original \n\...,2020-06-27T04:27:20.760939Z,US,Jay & Sharon,Jay & Sharon\none boring flavor...original \n\...,zh-TW,10245270378,9700000,False,515,UURVtlcqayOmyuLIDrT3ng1w,"[https://en.wikipedia.org/wiki/Entertainment, ..."
3,UCJ-lUkI7HFBXUG16RYQYWxw,Cartrack,"As a global company, we offer data analytic so...",2014-02-17T08:48:16Z,ZA,Cartrack,"As a global company, we offer data analytic so...",,4799700,13400,False,110,UUJ-lUkI7HFBXUG16RYQYWxw,"[https://en.wikipedia.org/wiki/Vehicle, https:..."
4,UCFGBxznLew-4-j0LESic87A,Places To Visit,I record videos of events such as cultural cel...,2008-04-19T04:11:13Z,US,Places To Visit,I record videos of events such as cultural cel...,,32186260,90600,False,2241,UUFGBxznLew-4-j0LESic87A,[https://en.wikipedia.org/wiki/Lifestyle_(soci...


In [14]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4230 entries, 0 to 4229
Data columns (total 14 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   id                        4230 non-null   object
 1   title                     4230 non-null   object
 2   description               4230 non-null   object
 3   publishedAt               4230 non-null   object
 4   country                   3355 non-null   object
 5   localized.title           4230 non-null   object
 6   localized.description     4230 non-null   object
 7   defaultLanguage           257 non-null    object
 8   viewCount                 4230 non-null   object
 9   subscriberCount           4230 non-null   object
 10  hiddenSubscriberCount     4230 non-null   bool  
 11  videoCount                4230 non-null   object
 12  relatedPlaylists.uploads  4230 non-null   object
 13  topicCategories           4169 non-null   object
dtypes: bool(1), object(13)
m

In [15]:
df['publishedAt'] = pd.to_datetime(df['publishedAt'],format = 'ISO8601')
df['viewCount'] = pd.to_numeric(df['viewCount'], errors='coerce')
df['subscriberCount'] = pd.to_numeric(df['subscriberCount'], errors='coerce')
df['videoCount'] = pd.to_numeric(df['videoCount'], errors='coerce')
df['hiddenSubscriberCount'] = df['hiddenSubscriberCount'].astype(bool)

In [16]:
df[df['description'].str.len() == 0]

Unnamed: 0,id,title,description,publishedAt,country,localized.title,localized.description,defaultLanguage,viewCount,subscriberCount,hiddenSubscriberCount,videoCount,relatedPlaylists.uploads,topicCategories
7,UCWajepQsMCmNBUHVIPirk8g,Helen Christie,,2013-07-28 21:33:29+00:00,,Helen Christie,,,1033614310,1890000,False,348,UUWajepQsMCmNBUHVIPirk8g,"[https://en.wikipedia.org/wiki/Film, https://e..."
13,UCi8C7TNs2ohrc6hnRQ5Sn2w,Programmers are also human,,2015-06-03 17:51:08+00:00,US,Programmers are also human,,,47542816,259000,False,86,UUi8C7TNs2ohrc6hnRQ5Sn2w,"[https://en.wikipedia.org/wiki/Humour, https:/..."
18,UC7jf7M34XLreNJUnxyyYtEQ,Strategy,,2023-11-03 13:57:01.863877+00:00,US,Strategy,,,3802669,19600,False,136,UU7jf7M34XLreNJUnxyyYtEQ,"[https://en.wikipedia.org/wiki/Film, https://e..."
24,UCLuYADJ6hESLHX87JnsGbjA,Josh Johnson,,2012-09-10 19:12:48+00:00,,Josh Johnson,,,306584739,1720000,False,767,UULuYADJ6hESLHX87JnsGbjA,"[https://en.wikipedia.org/wiki/Humour, https:/..."
35,UCY25D8Rak0XwGRRir57K6qg,anonymousentertainment11,,2024-02-25 12:17:41.527881+00:00,,anonymousentertainment11,,,113,6,False,10,UUY25D8Rak0XwGRRir57K6qg,"[https://en.wikipedia.org/wiki/Sports_game, ht..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4074,UCZN3a06e2DoYYXfVQmBkASQ,The Turkish Army´s defense industry,,2024-10-26 10:57:08.631658+00:00,US,The Turkish Army´s defense industry,,,29772,38,False,203,UUZN3a06e2DoYYXfVQmBkASQ,[https://en.wikipedia.org/wiki/Lifestyle_(soci...
4077,UCat1rOpGPg5Y4uWph3GYtWQ,Thinkcode AI,,2025-03-09 16:31:03.894673+00:00,US,Thinkcode AI,,,14215,120,False,11,UUat1rOpGPg5Y4uWph3GYtWQ,"[https://en.wikipedia.org/wiki/Knowledge, http..."
4170,UCXRgGY-3F837OGkr0ipZsRA,ZONA TECH,,2020-07-31 22:47:28.488650+00:00,ID,ZONA TECH,,,185996,1820,False,57,UUXRgGY-3F837OGkr0ipZsRA,"[https://en.wikipedia.org/wiki/Technology, htt..."
4196,UC1V_cslcZ7BMk57t_CeByjQ,Krista Lamen | Notion for Life,,2022-06-22 15:24:30.896101+00:00,PL,Krista Lamen | Notion for Life,,,37988,208,False,35,UU1V_cslcZ7BMk57t_CeByjQ,"[https://en.wikipedia.org/wiki/Technology, htt..."


In [17]:
df[df['title'] != df['localized.title']]

Unnamed: 0,id,title,description,publishedAt,country,localized.title,localized.description,defaultLanguage,viewCount,subscriberCount,hiddenSubscriberCount,videoCount,relatedPlaylists.uploads,topicCategories


In [18]:
df[df['description'] != df['localized.description']]

Unnamed: 0,id,title,description,publishedAt,country,localized.title,localized.description,defaultLanguage,viewCount,subscriberCount,hiddenSubscriberCount,videoCount,relatedPlaylists.uploads,topicCategories


In [19]:
drop_columns = ['localized.title', 'localized.description']
df.drop(columns=drop_columns, inplace=True)

In [20]:
#Null proportion pff objects columns
object_columns = df.select_dtypes(include=['object']).columns
null_proportions = df[object_columns].isnull().mean()

plotly_fig = px.bar(null_proportions, x=null_proportions.index, y=null_proportions.values,
                    labels={'x': 'Column', 'y': 'Null Proportion'},
                    title='Null Proportion of Object Columns')
plotly_fig.show()

In [21]:
#turn topicCategories into a string
df['topicCategories'] = df['topicCategories'].apply(lambda x: '; '.join(x) if isinstance(x, list) else x)

In [22]:
#fill country, topicCategories with mode
for column in ['country', 'topicCategories']:
    mode_value = df[column].mode()[0]
    df.fillna({column: mode_value}, inplace=True)

In [23]:
#fill null of default Language with Detect Language of description
def detect_language(text):
    try:
        return detect(text)
    except:
        return 'unknown'

df['defaultLanguage'] = df['defaultLanguage'].fillna(df['description'].apply(detect_language))

In [24]:
df['topicCategories'] = df['topicCategories'].str.replace('https://en.wikipedia.org/wiki/', '', regex=True)

In [25]:
#view histogram of object columns
def plot_histogram(df, column):
    fig = px.histogram(df, x=column, title=f'Histogram of {column}')
    fig.show()

for column in ['country', 'defaultLanguage', 'hiddenSubscriberCount']:
    plot_histogram(df, column)


In [26]:
#which word appears most in the topicCategories, sepearated by ';
def count_words_in_topic_categories(df):
    from collections import Counter
    all_words = []
    for categories in df['topicCategories'].dropna():
        words = categories.split(';')
        all_words.extend(words)
    return Counter(all_words)

word_counts = count_words_in_topic_categories(df)
def plot_word_counts(word_counts):
    import plotly.express as px
    words, counts = zip(*word_counts.most_common(20))
    fig = px.bar(x=words, y=counts, labels={'x': 'Word', 'y': 'Count'},
                 title='Top 20 Words in Topic Categories')
    fig.show()
plot_word_counts(word_counts)

In [27]:
#check null proportion of other columns
numeric_columns = df.select_dtypes(include=['number', 'datetime64[ns, UTC]']).columns

null_proportions_numeric = df[numeric_columns].isnull().mean()
plotly_fig_numeric = px.bar(null_proportions_numeric, x=null_proportions_numeric.index, y=null_proportions_numeric.values,
                            labels={'x': 'Column', 'y': 'Null Proportion'},
                            title='Null Proportion of Numeric Columns')
plotly_fig_numeric.show()

In [28]:
df.describe()

Unnamed: 0,viewCount,subscriberCount,videoCount
count,4230.0,4230.0,4230.0
mean,331649700.0,734431.6,2606.026714
std,2592321000.0,3537802.0,17913.459734
min,0.0,0.0,0.0
25%,216136.0,1735.0,79.25
50%,2708230.0,22600.0,264.0
75%,32986890.0,209000.0,753.75
max,104850600000.0,91100000.0,469807.0


In [29]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4230 entries, 0 to 4229
Data columns (total 12 columns):
 #   Column                    Non-Null Count  Dtype              
---  ------                    --------------  -----              
 0   id                        4230 non-null   object             
 1   title                     4230 non-null   object             
 2   description               4230 non-null   object             
 3   publishedAt               4230 non-null   datetime64[ns, UTC]
 4   country                   4230 non-null   object             
 5   defaultLanguage           4230 non-null   object             
 6   viewCount                 4230 non-null   int64              
 7   subscriberCount           4230 non-null   int64              
 8   hiddenSubscriberCount     4230 non-null   bool               
 9   videoCount                4230 non-null   int64              
 10  relatedPlaylists.uploads  4230 non-null   object             
 11  topicCategories  

In [30]:
#rename columns
df.rename(columns={'relatedPlaylists.uploads': 'uploadsPlaylistId'}, inplace=True)

In [33]:
df['crawl_date'] = pd.to_datetime('today').normalize()

0      2025-06-15
1      2025-06-15
2      2025-06-15
3      2025-06-15
4      2025-06-15
          ...    
4225   2025-06-15
4226   2025-06-15
4227   2025-06-15
4228   2025-06-15
4229   2025-06-15
Name: crawl_date, Length: 4230, dtype: datetime64[ns]

In [34]:
# Load config
dataset_id = "2_cleaned_data"
table_id = 'channel_basic_info'
table_ref = f"{project_id}.{dataset_id}.{table_id}"
full_dataset_id = f"{project_id}.{dataset_id}"
client = bigquery.Client(project=project_id)
dataset = bigquery.Dataset(full_dataset_id)


Your application has authenticated using end user credentials from Google Cloud SDK without a quota project. You might receive a "quota exceeded" or "API not enabled" error. See the following page for troubleshooting: https://cloud.google.com/docs/authentication/adc-troubleshooting/user-creds. 



In [35]:
job_config = bigquery.LoadJobConfig(
    write_disposition=bigquery.WriteDisposition.WRITE_APPEND,  # Use WRITE_TRUNCATE to overwrite
    autodetect=True  # Let BigQuery detect schema from DataFrame
)

job = client.load_table_from_dataframe(df, table_ref, job_config=job_config)
job.result()  # Wait for the job to complete

print(f"✅ Data uploaded to BigQuery table: {table_id}")


Loading pandas DataFrame into BigQuery will require pandas-gbq package version 0.26.1 or greater in the future. Tried to import pandas-gbq and got: No module named 'pandas_gbq'



✅ Data uploaded to BigQuery table: channel_basic_info
