In [20]:
# 1) Imports & load your artifacts
import pytz
from datetime import datetime
import pandas as pd
import numpy as np
import os
import logging
import joblib


from predictor import (
    get_predictor_artifacts,
    _infer_grid_for_game,
    _get_last_row_for_stream,
)
_ARTIFACT_PATH = os.path.join(os.getcwd(), "predictor_artifacts.joblib")
DEFAULT_START_TIMES   = list(range(24))     # 0..23 hours
DEFAULT_DURATIONS_HRS = list(range(2, 8))  # 2..12 hours
DEFAULT_DURATIONS_HRS  = [h * 60 for h in DEFAULT_DURATIONS_HRS]
DEFAULT_DURATIONS_HRS = [4]

if os.path.exists(_ARTIFACT_PATH):
    try:
        data = joblib.load(_ARTIFACT_PATH)
        df_inf = data.get("df_for_inf")
        df_inf['game_category'] = df_inf['game_category'].str.lower()
        if isinstance(df_inf, pd.DataFrame):
            df_inf.columns = df_inf.columns.map(str)
        pipes = data.get("pipelines", [])
        df = data.get("df_for_inf")
        features = data.get("features")
        cat_opts = data.get("stream_category_options_inf")
        start_opts = data.get("optional_start_times", DEFAULT_START_TIMES)
        dur_opts = data.get("stream_duration_opts", DEFAULT_DURATIONS_HRS)
        metrics_list = data.get("metrics_list", [])
        logging.info("Loaded predictor artifacts from %s", _ARTIFACT_PATH)
    except Exception as e:
        logging.exception("Failed to load artifacts; will train on‐dyno when invoked: %s", e)
else:
    logging.info("No predictor_artifacts.joblib found; on‐dyno training available when called.")

# load pipelines list + data & metadata
# now returns: (List[Pipeline], df_for_inf, features, cat_opts, start_opts, dur_opts, metrics_list)


ready = all(p is not None for p in pipes) and df is not None

print(features)
for f in features:
    print(f)

# extract the full tag vocabulary from the first pipeline
pre = pipes[0].named_steps["pre"]
vectorizer = pre.named_transformers_["tags"].named_steps["vectorize"]
all_tags = vectorizer.get_feature_names_out().tolist()


# 2) User‐adjustable parameters
stream_name         = "thelegendyagami"
selected_game       = "ELDEN RING"  # e.g. "Fortnite"
selected_start_time = 19                  # hour in 0–23
selected_tags       = [["Veteran", "AIArt", "English", "HardestDifficulty"]]  # list of tags you want to test


# 3) Helper: build a feature‐row for a given stream/game/start/tags
def make_feature_row(baseline, game, hour, tags, features):
    r = baseline.copy()
    # 1) set categorical & time features
    r["game_category"]   = game
    r["start_time_hour"] = hour

    now_est  = datetime.now(pytz.timezone("US/Eastern"))
    dow      = now_est.strftime("%A")
    r["day_of_week"]     = dow
    r["start_hour_sin"]  = np.sin(2 * np.pi * hour / 24)
    r["start_hour_cos"]  = np.cos(2 * np.pi * hour / 24)
    # <-- FIXED here:
    r["is_weekend"]      = dow in ("Saturday", "Sunday")

    # 2) set tags one-hot
    for t in all_tags:
        r[f"tag_{t}"] = int(t in tags)

    # 3) build a single-row DataFrame and select only the model’s features
    return pd.DataFrame([r])[features]


# grab the “last row” for your stream as baseline
baseline = _get_last_row_for_stream(df, stream_name)

times = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]
# game_cats = (
#     df.loc[df["stream_name"] == stream_name, "game_category"]
#       .dropna()
#       .unique()
#       .tolist()
# )
game_cats = (
    df["game_category"]
      .dropna()
      .unique()
      .tolist()
)




['day_of_week', 'start_hour_sin', 'start_hour_cos', 'is_weekend', 'days_since_previous_stream', 'game_category', 'stream_duration', 'avg_total_subscriptions_last_1', 'median_total_subscriptions_last_1', 'std_total_subscriptions_last_1', 'min_total_subscriptions_last_1', 'max_total_subscriptions_last_1', 'avg_total_subscriptions_last_3', 'median_total_subscriptions_last_3', 'std_total_subscriptions_last_3', 'min_total_subscriptions_last_3', 'max_total_subscriptions_last_3', 'avg_total_subscriptions_last_7', 'median_total_subscriptions_last_7', 'std_total_subscriptions_last_7', 'min_total_subscriptions_last_7', 'max_total_subscriptions_last_7', 'avg_total_subscriptions_last_14', 'median_total_subscriptions_last_14', 'std_total_subscriptions_last_14', 'min_total_subscriptions_last_14', 'max_total_subscriptions_last_14', 'avg_net_follower_change_last_1', 'median_net_follower_change_last_1', 'std_net_follower_change_last_1', 'min_net_follower_change_last_1', 'max_net_follower_change_last_1'

In [21]:
# 4) Make predictions for each model in `pipes`
p = 0

max_pred = 0
min_pred = 100
max_game_cat = None
min_game_cat = None
total_results = []
for game in game_cats:
    results = []
    i = 0
    for idx, pipe in enumerate(pipes, start=1):
        X = make_feature_row(baseline, game, selected_start_time, selected_tags, features)
        y = pipe.predict(X)[0]
        if y > max_pred and i == p:
            max_pred = y
            max_game_cat = game
        if y < min_pred and i == p:
            min_game_cat = game
            min_pred = y
        results.append({
            "model":   f"pipe{idx}",
            "y_pred":  round(y, 2),
            "metrics": metrics_list[idx-1]      # in case you want to inspect its training metrics
        })
        total_results.append({
            "model":   f"pipe{idx}",
            "y_pred":  round(y, 2),
            "metrics": metrics_list[idx-1]      # in case you want to inspect its training metrics
        })
        i+=1

print('Max Prediction:', max_pred)
print("Game Category:", max_game_cat)
print('Min Prediction:', min_pred)
print("Game Category:", min_game_cat)
# for r in total_results:
#     print(r)
    
game = max_game_cat
max_pred = 0
min_pred = 100
max_game_cat = None
min_game_cat = None
for t in times:
    results = []
    i = 0
    for idx, pipe in enumerate(pipes, start=1):
        X = make_feature_row(baseline, game, t, selected_tags, features)
        y = pipe.predict(X)[0]
        if y > max_pred and i == p:
            max_pred = y
            max_time = t
        if y < min_pred and i == p:
            min_time = t
            min_pred = y
        results.append({
            "model":   f"pipe{idx}",
            "y_pred":  round(y, 2),
            "metrics": metrics_list[idx-1]      # in case you want to inspect its training metrics
        })
        i+=1

print('Max Prediction:', max_pred)
print("Time for Max Pred:", max_time)
print('Min Prediction:', min_pred)
print("Time for Min Pred:", min_time)



Max Prediction: 0.0990973646968827
Game Category: just chatting
Min Prediction: 0.09480685106900709
Game Category: final fantasy viii
Max Prediction: 0.09950933883074929
Time for Max Pred: 20
Min Prediction: 0.08257654711602087
Time for Min Pred: 14


In [None]:
from itertools import chain

def best_tag_combinations(
    pipe,
    baseline,
    game,
    hour,
    features,
    candidate_tags,
    max_tags=None
):
    """
    Greedily build up a tag set one tag at a time,
    always picking the tag that gives the largest bump
    in pipe.predict. Returns a list of (tag_tuple, score).
    """
    # start from no tags
    selected    = []
    # baseline prediction with zero tags
    X0          = make_feature_row(baseline, game, hour, [], features)
    best_score  = pipe.predict(X0)[0]
    history     = [(tuple(selected), best_score)]
    remaining   = set(candidate_tags)

    while remaining and (max_tags is None or len(selected) < max_tags):
        # try adding each remaining tag
        scores = {}
        for t in remaining:
            tags_try = selected + [t]
            X_try    = make_feature_row(baseline, game, hour, tags_try, features)
            scores[t] = pipe.predict(X_try)[0]

        # pick the tag with the highest resulting score
        best_tag, score = max(scores.items(), key=lambda kv: kv[1])

        # stop if nothing improves
        if score <= best_score:
            break

        # otherwise record and continue
        selected.append(best_tag)
        remaining.remove(best_tag)
        best_score = score
        history.append((tuple(selected), best_score))

    return history




In [None]:
import itertools
import pandas as pd

stream_name         = "thelegendyagami"
# grab the “last row” for your stream as baseline
baseline = _get_last_row_for_stream(df, stream_name)

# 1) Legend’s games (only from their history, no preds needed)
legend_games = df.loc[
    df["stream_name"] == stream_name,
    "game_category"
].unique().tolist()

# 2) Legend’s tags
legend_tags = sorted({
    tag
    for tags in df.loc[df["stream_name"] == stream_name, "raw_tags"].dropna()
    for tag in tags
})


# 2) Build a **restricted** combo‐grid
grid = pd.DataFrame(
    list(itertools.product(legend_games, start_opts, dur_opts)),
    columns=["game_category", "start_time_hour", "stream_duration"]
)


# 3) Predict all three metrics for each combo
def predict_all_metrics(row):
    X = make_feature_row(
        baseline,
        row.game_category,
        row.start_time_hour,
        selected_tags,
        features
    )
    return pd.Series({
        "subs":      round(pipes[0].predict(X)[0], 2),
        "followers": round(pipes[1].predict(X)[0], 2),
        "viewers":   round(pipes[2].predict(X)[0], 2),
    })

df_metrics = pd.concat(
    [grid, grid.apply(predict_all_metrics, axis=1)],
    axis=1
)


# 4) Top-3 combos by each metric
top3_subs      = df_metrics.nlargest(3, "subs")
top3_followers = df_metrics.nlargest(3, "followers")
top3_viewers   = df_metrics.nlargest(3, "viewers")


print("🔸 Top 3 game/time/duration for MAX subs 🔸")
display(top3_subs)

print("🔸 Top 3 game/time/duration for MAX follower growth 🔸")
display(top3_followers)

print("🔸 Top 3 game/time/duration for MAX viewers 🔸")
display(top3_viewers)


# 5) Top-3 single-tag suggestions, **restricted** to legend_tags

print("legend_tags:", legend_tags)

combo_histories = []
for idx, pipe in enumerate(pipes, start=1):
    hist = best_tag_combinations(
        pipe,
        baseline,
        selected_game,
        selected_start_time,
        features,
        legend_tags,
        max_tags=len(legend_tags)  # or cap at some smaller number if you like
    )
    combo_histories.append(hist)

    # grab the top 3 combos by predicted value
    top3 = sorted(hist, key=lambda x: x[1], reverse=True)[:3]
    print(f"\n🔸 Model {idx} top‐3 tag combos (score) 🔸")
    for combo, score in top3:
        print(f"  {combo} → {score:.2f}")



In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score

y_true = y_test
y_pred = model.best_estimator_.predict(X_test)

plt.scatter(y_true, y_pred, alpha=0.3)
plt.plot([y_true.min(), y_true.max()],
        [y_true.min(), y_true.max()],
        'k--', lw=2)
plt.xlabel("Actual subscriptions")
plt.ylabel("Predicted subscriptions")
plt.title(f"R² = {r2_score(y_true, y_pred):.2f}")
plt.show()