In [1]:
import os, requests, pandas as pd
from dotenv import load_dotenv
load_dotenv()
from typing import Literal

from sqlalchemy import create_engine
from urllib.parse import quote_plus  # safely URL-encode the driver name

In [2]:
BASE = "https://matrix.sbapis.com/b/youtube/statistics"

In [3]:
def get_socialblade_daily(
    channel_id: str,
    client_id: str | None = None,
    token: str | None = None,
    history: Literal["default", "extended", "archive"] = "archive",
) -> pd.DataFrame:
    """
    Return a DataFrame with columns: date (datetime64[ns]), subs (int), views (int)
    Requires a Social Blade Business-API subscription.
    """
    client_id = client_id or os.getenv("SBL_CLIENT_ID")
    token     = token     or os.getenv("SBL_TOKEN")
    if not (client_id and token):
        raise ValueError("Missing Social Blade credentials")

    headers = {"clientid": client_id, "token": token}
    params  = {"query": channel_id, "history": history, "allow-stale": "false"}

    resp = requests.get(BASE, headers=headers, params=params, timeout=15)
    resp.raise_for_status()          # â†’ HTTPError on 4xx/5xx

    data = resp.json()["data"]["daily"]   # list[dict]

    df = pd.DataFrame(data)
    df["date"] = pd.to_datetime(df["date"], utc=True).dt.tz_convert(None)
    df = df.rename(columns={"subs": "subscribers"}).sort_values("date").reset_index(drop=True)
    df["source"] = "socialblade"

    return df


In [4]:
sb_df = get_socialblade_daily("UC1E-JS8L0j1Ei70D9VEFrPQ",
                              client_id=os.getenv("SBL_CLIENT_ID"),
                              token=os.getenv("SBL_TOKEN"))

In [5]:
sb_df.to_csv('Vaush_SocialBlade_subs.csv', index=False)

In [6]:
sb_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1094 entries, 0 to 1093
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   date         1094 non-null   datetime64[ns]
 1   subscribers  1094 non-null   int64         
 2   views        1094 non-null   int64         
 3   source       1094 non-null   object        
dtypes: datetime64[ns](1), int64(2), object(1)
memory usage: 34.3+ KB


In [7]:
client_id=os.getenv("SBL_CLIENT_ID")
print(client_id)

cli_fb70343863c4fb0936e00e99


In [8]:
# build engine (same style you've been using)
def select_all_azure_sql():
    drv = "ODBC Driver 18 for SQL Server"
    odbc_str = (
        f"DRIVER={{{drv}}};"
        f"SERVER=tcp:{os.getenv('AZSQL_SERVER')},1433;"
        f"DATABASE={os.getenv('AZSQL_DATABASE')};"
        f"UID={os.getenv('AZSQL_USERNAME')};"
        f"PWD={os.getenv('AZSQL_PASSWORD')};"
        "Encrypt=yes;"
        "TrustServerCertificate=no;"
        "Connection Timeout=30;"
    )

    params = quote_plus(odbc_str)
    engine = create_engine(f"mssql+pyodbc:///?odbc_connect={params}")

    # read table into pandas
    df = pd.read_sql("SELECT * FROM SOCIAL_BLADE_API.Vaush_HIST_SUBS_VIEWS", engine)
    return df

In [9]:
def filter_new_videos(df_in_database, df_from_api):
    keys = set(df_in_database["date"])
    df_filtered = df_from_api[~df_from_api["date"].isin(keys)]
    return df_filtered

In [10]:
def df_to_azure_sql(df):
    """
    Write/append the dataframe into dbo.youtube_videos (Azure SQL DB)
    """

    drv = "ODBC Driver 18 for SQL Server"            # keep spaces!
    odbc_str = (
        f"Driver={drv};Server=tcp:{os.getenv('AZSQL_SERVER')},1433;"
        f"Database={os.getenv('AZSQL_DATABASE')};"
        f"Uid={os.getenv('AZSQL_USERNAME')};"
        f"Pwd={os.getenv('AZSQL_PASSWORD')};"
        "Encrypt=yes;TrustServerCertificate=no;Connection Timeout=30;"
    )

    # SQLAlchemy-style URL.  Space â†’ + ;  parentheses â†’ %28 %29, etc.
    params = quote_plus(odbc_str)
    engine = create_engine(
        f"mssql+pyodbc:///?odbc_connect={params}",
        fast_executemany=True        # batches rows under the hood
    )

    # â€”â€” upsert strategy: try append-only, let PK skip duplicates
    with engine.begin() as cn:
        df.to_sql(
            name="Vaush_HIST_SUBS_VIEWS",
            con=cn,
            schema="SOCIAL_BLADE_API",
            if_exists="append",       # create once, then append
            index=False,
            chunksize=1000,           # good balance of  network / TX
        )

In [19]:
# Ensure date is truly a date (no time)
sb_df["date"] = pd.to_datetime(sb_df["date"], errors="coerce").dt.date

# Drop null dates if any
sb_df = sb_df.dropna(subset=["date"])

# Remove duplicate days within the incoming dataset
sb_df = sb_df.drop_duplicates(subset=["date"], keep="last")

In [14]:
df_in_db = select_all_azure_sql()

# Ensure date is truly a date (no time)
df_in_db["date"] = pd.to_datetime(df_in_db["date"], errors="coerce").dt.date

# Drop null dates if any
df_in_db = df_in_db.dropna(subset=["date"])

# Remove duplicate days within the incoming dataset
df_in_db = df_in_db.drop_duplicates(subset=["date"], keep="last")

In [26]:
df_filtered_insert = filter_new_videos(df_in_database=df_in_db, df_from_api=sb_df)


In [27]:
df_to_azure_sql(df_filtered_insert[['date','subscribers','views']])
print("Data pushed to Azure SQL ðŸŽ‰")

Data pushed to Azure SQL ðŸŽ‰
