# Backchannel Type Prediction via Large Language Models (LLMs)

This notebook shows you how to classify backchannel types—**“emotive”**, **“cognitive”**, or **“not backchannel”**—using OpenAI’s GPT models.

> **Before you begin:**  
> 1. Get an OpenAI API key and base URL.  
> 2. Purchase the required quotas on [OpenAI’s API](https://openai.com/index/openai-api/).  
> 3. Enter your credentials in the cell below (`api_key`, `api_base`).

---


In [2]:
##################################################
############### API Key of ChatGPT ###############
##################################################

# api_key = "sk_..."
# api_base = ""

##################################################
##################################################
##################################################


import warnings
warnings.filterwarnings("ignore")

import numpy as np
import glob
import os
from openai import OpenAI
import pandas as pd

import sys
sys.path.append("../sho_util/pyfiles/")
from basic import get_bool_base_on_conditions

sys.path.append('../pyfiles/')
from dialog import get_start_end_referencedf, most_frequent
from llmprediction import GetResult_Backchannel, get_past_future_conversation, get_prompt_backchannel

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 504)
pd.set_option('display.max_colwidth', None)

# client = OpenAI(api_key=api_key, base_url=api_base)
client = OpenAI(
    base_url = 'http://localhost:11434/v1',
    api_key='ollama', # required, but unused
)

---

In this example, we predict backchannel types with a GPT model. To improve context, we include both past and future turns in the prompt. Please adjust the following variables as needed:

- `audiopath`: A string containing the path to your two‑channel audio file.  
- `feature_dir`: A string specifying the directory where all preprocessed outputs will be saved.  
- `past_bc`: An integer specifying how many past turns to include in the prompt.  
- `future_bc`: An integer specifying how many future turns to include in the prompt.  
- `model`: A string naming the GPT model to use (e.g., `"gpt-4o"`). To add more models, edit the `gpt_api_no_stream` function in `./sho_util/pyfiles/gpt.py`.

---


In [4]:
###########################################
########## Adjustable Parameters ##########
###########################################

audiopath = "../audio/sample.wav"
feature_dir = "../audio/features/sample/"
past_bc = 2
future_bc = 2
# model = "gpt-4o"
model = "gemma3:12b"

###########################################
###########################################
###########################################

repeatnum_bc = 1
delete_toomuch_overlap = True
get_response = True

print("####################")
print("### Loading Data ###")
print("####################")

resultpath = feature_dir + "whisper/" + os.path.basename(audiopath[:-4]) + f".npy"
laughpath = feature_dir + "laughs/" + os.path.basename(audiopath[:-4]) + f".npy"
tablepath = laughpath.replace("laughs", "results")

a = np.load(tablepath, allow_pickle=True).item()
rawdata, data1, data2, data3, dfci = a["rawdata"], a["data1"], a["data2"], a["data3"], a["dfci"]

### Backchanneling
dfbc = data2[data2["BC-Candidates"]]
texts = [""]*len(dfbc)
dfbc["BC-Labels"] = texts

print("###################################")
print("### Backchannel Type Prediction ###")
print("###################################")

skips = []
addname = "" if model=="" else "_"+model.split("-")[-1].replace(":", "_")
dirname = feature_dir + f'LLM_responses{addname}/' + os.path.basename(audiopath)[:-4] + "/"
os.makedirs(dirname, exist_ok=True)
dfturntaking = data1.copy()
for i in range(len(dfbc)):
    array = dfbc.iloc[i]
    ol = array["Overlap"].split("-")[0]
    arrayol = data2.iloc[int(ol)]

    # Skip if we already have responses
    startllm, endllm = get_start_end_referencedf(rawdata, array)
    endllm = np.argmin(np.abs(rawdata["end"]-array["end"]))
    a = glob.glob(dirname + f"backchannel_{startllm}_{endllm}_*.npy")
    a.sort()
    b = {int(os.path.basename(path).split("_")[-1][:-4]): path for path in a}
    iter_list = list(set(list(range(repeatnum_bc))) - set(list(b.keys())))
    if len(iter_list)==0:
        continue

    # Get past and future conversation
    start, end = get_start_end_referencedf(dfturntaking, arrayol)
    start = np.argmin(np.abs(dfturntaking["start"]-arrayol["start"]))
    end = np.argmin(np.abs(dfturntaking["end"]-arrayol["end"]))
    dfpast = dfturntaking.iloc[:start].copy()
    dffuture = dfturntaking.iloc[end+1:].copy()
    dfpast, dffuture = get_past_future_conversation(dfpast, dffuture, past_bc, future_bc, delete_toomuch_overlap)

    start, end = get_start_end_referencedf(rawdata, arrayol)
    rangedf = rawdata.iloc[start:end+1].reset_index(drop=True)

    start = np.argmin(np.abs(rangedf["start"]-array["start"]))
    end = np.argmin(np.abs(rangedf["end"]-array["end"]))

    a = rangedf.iloc[start:end+1]
    a = a[get_bool_base_on_conditions(a, {"speaker":[array["speaker"]]})]
    b = "{{{" + (" ".join(a["transcription"].values)) + "}}}"
    a = a.iloc[:1]
    a["transcription"] = b

    df = rangedf[get_bool_base_on_conditions(rangedf, {"speaker":[arrayol["speaker"]]})]
    df["start"] = df["end"]
    df = pd.concat([df, a], axis=0).sort_values("start")
    backchannel = array["transcription"]
    transcript = arrayol["transcription"]
    both = " ".join(list(df.transcription))
    current = [arrayol["speaker"], array["speaker"], backchannel, transcript, both]

    prompt = get_prompt_backchannel(dfpast, dffuture, current)
    if get_response:
        for r in iter_list:
            savepath = dirname + f'backchannel_{startllm}_{endllm}_{r}.npy'
            if os.path.exists(savepath):
                continue
            response = GetResult_Backchannel(client, prompt, model)
            np.save(savepath, response)

print("#####################################")
print("##### Display Prediction Result #####")
print("#####################################")

a = np.load(tablepath, allow_pickle=True).item()
rawdata, data1, data2, data3, dfci = a["rawdata"], a["data1"], a["data2"], a["data3"], a["dfci"]
dfbc = data2[data2["BC-Candidates"]]
model = "gpt-4o"
addname = "" if model=="" else "_"+model.split("-")[-1]
dirname = feature_dir + f'LLM_responses{addname}/' + os.path.basename(audiopath)[:-4] + "/"
tt_classes = ["interjection type"]
keys = ["_".join(os.path.basename(a).split("_")[1:3]) for a in glob.glob(dirname+"backchannel_*_0.npy")]
keys.sort()

udfbc = dfbc.copy()
udfbc["BC-Labels"] = ""
for cl in tt_classes[1:]:
    udfbc["BC-"+cl] = ""
for key in keys:
    # Obtain the prediction results
    paths = glob.glob(dirname+f"backchannel_{key}_*.npy")
    results = {cl.lower(): [] for cl in tt_classes}
    for path in paths:
        a = np.load(path, allow_pickle=True).item()

        for cl in tt_classes:
            exist = cl in a
            if not(exist):
                cl = cl.lower()
                exist = cl in a
            if exist:
                try:
                    results[cl] += [a[cl].lower()]
                except AttributeError:
                    results[cl] += [key.lower() for key in a[cl]]

    if len(results[tt_classes[0]])==0:
        continue

    summary = {}
    for freq_key in [a.lower() for a in tt_classes[:2]]:
        summary[freq_key] = most_frequent(results[freq_key])
    for score_key in [a.lower() for a in tt_classes[2:]]:
        summary[score_key] = np.mean([score_dir[score_key[0].upper()+score_key[1:]][a] for a in results[score_key]])

    # Insert the information to udfbc
    startllm, endllm = [int(a) for a in key.split("_")]
    try:
        start = np.arange(len(udfbc))[np.abs(rawdata.iloc[startllm]["start"]-udfbc["start"])<1e-5][0]
        idx = udfbc.iloc[start].name
        udfbc.loc[idx, "BC-Labels"] = summary["interjection type"]
        for cl in tt_classes[1:]:
            udfbc.loc[idx, "BC-"+cl] = summary[cl.lower()]
    except IndexError:
        continue
udfbc

####################
### Loading Data ###
####################
###################################
### Backchannel Type Prediction ###
###################################
#####################################
##### Display Prediction Result #####
#####################################


Unnamed: 0,start,end,speaker,transcription,duration,duration-before-talking,Overlap,Fully-Overlap,BC-Candidates,BC-Labels
1,1.972,2.060062,B,hello,0.088062,0.0,0,True,True,not backchannel
9,13.38,13.78,B,uh okay,0.4,2.78,8,True,True,cognitive
19,33.744,33.912,A,oh,0.168,2.204,18,True,True,emotive
25,45.232,45.44,B,yeah,0.208,1.892,24,True,True,emotive
28,50.796,51.1,B,that's right,0.304,1.304,27,True,True,cognitive
41,90.48,90.82,B,uh huh,0.34,7.56,40,True,True,
42,95.316,95.7,B,oh really,0.384,4.496,40,True,True,cognitive
43,104.684,105.57,B,yeah [Laugh],0.886,8.984,40,True,True,emotive
44,107.46,107.78,B,we did,0.32,1.89,40,True,True,not backchannel
45,112.732,114.64,B,no he was pretty good he never really chewed anything,1.908,4.952,40,True,True,not backchannel
