In [None]:
# 1) Imports & load your artifacts
import pytz
from datetime import datetime
import pandas as pd
import numpy as np

from predictor import (
    get_predictor_artifacts,
    _infer_grid_for_game,
    _get_last_row_for_stream,
)

# load pipelines list + data & metadata
# now returns: (List[Pipeline], df_for_inf, features, cat_opts, start_opts, dur_opts, metrics_list)
pipes, df, features, cat_opts, start_opts, dur_opts, metrics_list = get_predictor_artifacts()
ready = all(p is not None for p in pipes) and df is not None

# extract the full tag vocabulary from the first pipeline
pre = pipes[0].named_steps["pre"]
vectorizer = pre.named_transformers_["tags"].named_steps["vectorize"]
all_tags = vectorizer.get_feature_names_out().tolist()


# 2) User‐adjustable parameters
stream_name         = "thelegendyagami"
selected_game       = "SomeGameCategory"  # e.g. "Fortnite"
selected_start_time = 19                  # hour in 0–23
selected_tags       = ["shooter", "fun"]  # list of tags you want to test


# 3) Helper: build a feature‐row for a given stream/game/start/tags
def make_feature_row(baseline, game, hour, tags, features):
    r = baseline.copy()
    # 1) set categorical & time features
    r["game_category"]   = game
    r["start_time_hour"] = hour

    now_est  = datetime.now(pytz.timezone("US/Eastern"))
    dow      = now_est.strftime("%A")
    r["day_of_week"]     = dow
    r["start_hour_sin"]  = np.sin(2 * np.pi * hour / 24)
    r["start_hour_cos"]  = np.cos(2 * np.pi * hour / 24)
    # <-- FIXED here:
    r["is_weekend"]      = dow in ("Saturday", "Sunday")

    # 2) set tags one-hot
    for t in all_tags:
        r[f"tag_{t}"] = int(t in tags)

    # 3) build a single-row DataFrame and select only the model’s features
    return pd.DataFrame([r])[features]


# grab the “last row” for your stream as baseline
baseline = _get_last_row_for_stream(df, stream_name)


# 4) Make predictions for each model in `pipes`
results = []
for idx, pipe in enumerate(pipes, start=1):
    X = make_feature_row(baseline, selected_game, selected_start_time, selected_tags, features)
    y = pipe.predict(X)[0]
    results.append({
        "model":   f"pipe{idx}",
        "y_pred":  round(y, 2),
        "metrics": metrics_list[idx-1]      # in case you want to inspect its training metrics
    })

pd.DataFrame(results)


Unnamed: 0,model,y_pred,metrics
0,pipe1,0.45,{'best_params': {'reg__regressor__max_depth': ...
1,pipe2,0.46,{'best_params': {'reg__regressor__max_depth': ...
2,pipe3,10.87,{'best_params': {'reg__regressor__max_depth': ...


In [7]:
from itertools import chain

def best_tag_combinations(
    pipe,
    baseline,
    game,
    hour,
    features,
    candidate_tags,
    max_tags=None
):
    """
    Greedily build up a tag set one tag at a time,
    always picking the tag that gives the largest bump
    in pipe.predict. Returns a list of (tag_tuple, score).
    """
    # start from no tags
    selected    = []
    # baseline prediction with zero tags
    X0          = make_feature_row(baseline, game, hour, [], features)
    best_score  = pipe.predict(X0)[0]
    history     = [(tuple(selected), best_score)]
    remaining   = set(candidate_tags)

    while remaining and (max_tags is None or len(selected) < max_tags):
        # try adding each remaining tag
        scores = {}
        for t in remaining:
            tags_try = selected + [t]
            X_try    = make_feature_row(baseline, game, hour, tags_try, features)
            scores[t] = pipe.predict(X_try)[0]

        # pick the tag with the highest resulting score
        best_tag, score = max(scores.items(), key=lambda kv: kv[1])

        # stop if nothing improves
        if score <= best_score:
            break

        # otherwise record and continue
        selected.append(best_tag)
        remaining.remove(best_tag)
        best_score = score
        history.append((tuple(selected), best_score))

    return history




In [11]:
import itertools
import pandas as pd

stream_name         = "thelegendyagami"
# grab the “last row” for your stream as baseline
baseline = _get_last_row_for_stream(df, stream_name)

# 1) Legend’s games (only from their history, no preds needed)
legend_games = df.loc[
    df["stream_name"] == stream_name,
    "game_category"
].unique().tolist()

# 2) Legend’s tags
legend_tags = sorted({
    tag
    for tags in df.loc[df["stream_name"] == stream_name, "raw_tags"].dropna()
    for tag in tags
})


# 2) Build a **restricted** combo‐grid
grid = pd.DataFrame(
    list(itertools.product(legend_games, start_opts, dur_opts)),
    columns=["game_category", "start_time_hour", "stream_duration"]
)


# 3) Predict all three metrics for each combo
def predict_all_metrics(row):
    X = make_feature_row(
        baseline,
        row.game_category,
        row.start_time_hour,
        selected_tags,
        features
    )
    return pd.Series({
        "subs":      round(pipes[0].predict(X)[0], 2),
        "followers": round(pipes[1].predict(X)[0], 2),
        "viewers":   round(pipes[2].predict(X)[0], 2),
    })

df_metrics = pd.concat(
    [grid, grid.apply(predict_all_metrics, axis=1)],
    axis=1
)


# 4) Top-3 combos by each metric
top3_subs      = df_metrics.nlargest(3, "subs")
top3_followers = df_metrics.nlargest(3, "followers")
top3_viewers   = df_metrics.nlargest(3, "viewers")


print("🔸 Top 3 game/time/duration for MAX subs 🔸")
display(top3_subs)

print("🔸 Top 3 game/time/duration for MAX follower growth 🔸")
display(top3_followers)

print("🔸 Top 3 game/time/duration for MAX viewers 🔸")
display(top3_viewers)


# 5) Top-3 single-tag suggestions, **restricted** to legend_tags

print("legend_tags:", legend_tags)

combo_histories = []
for idx, pipe in enumerate(pipes, start=1):
    hist = best_tag_combinations(
        pipe,
        baseline,
        selected_game,
        selected_start_time,
        features,
        legend_tags,
        max_tags=len(legend_tags)  # or cap at some smaller number if you like
    )
    combo_histories.append(hist)

    # grab the top 3 combos by predicted value
    top3 = sorted(hist, key=lambda x: x[1], reverse=True)[:3]
    print(f"\n🔸 Model {idx} top‐3 tag combos (score) 🔸")
    for combo, score in top3:
        print(f"  {combo} → {score:.2f}")



🔸 Top 3 game/time/duration for MAX subs 🔸


Unnamed: 0,game_category,start_time_hour,stream_duration,subs,followers,viewers
627,ufo 50,9,2,0.86,0.47,10.59
628,ufo 50,9,3,0.86,0.47,10.59
629,ufo 50,9,4,0.86,0.47,10.59


🔸 Top 3 game/time/duration for MAX follower growth 🔸


Unnamed: 0,game_category,start_time_hour,stream_duration,subs,followers,viewers
385,hades ii,11,2,0.68,0.54,10.94
386,hades ii,11,3,0.68,0.54,10.94
387,hades ii,11,4,0.68,0.54,10.94


🔸 Top 3 game/time/duration for MAX viewers 🔸


Unnamed: 0,game_category,start_time_hour,stream_duration,subs,followers,viewers
913,donkey kong bananza,11,2,0.49,0.54,11.04
914,donkey kong bananza,11,3,0.49,0.54,11.04
915,donkey kong bananza,11,4,0.49,0.54,11.04


legend_tags: ['AIArt', 'Chıll', 'English', 'HardestDifficulty', 'Variety', 'Veteran', 'WWE', 'WrestlingFan', 'blackguy', 'fightnight']

🔸 Model 1 top‐3 tag combos (score) 🔸
  () → 0.45

🔸 Model 2 top‐3 tag combos (score) 🔸
  () → 0.46

🔸 Model 3 top‐3 tag combos (score) 🔸
  () → 10.87
