In [None]:
pip install langchain_community

In [None]:
from langchain_community.chat_models import ChatOllama
from langchain.prompts import ChatPromptTemplate
import pandas as pd
import json
import time

In [None]:
merged = pd.read_csv("analysis.csv")
llm = ChatOllama(
    model="mistral:instruct",
    temperature=0,
    format="json"
)

In [None]:
prompt_template = ChatPromptTemplate.from_template("""
You are a concise editor that rates snippets for short-form video hooks.

Input: a short transcript segment.

Task: Return valid JSON with keys:
- hook_score (integer 1-10, 10 = strongest hook),
- confidence (float 0.0-1.0).

Only output JSON. Do not add any commentary.

Segment:
\"\"\"{text}\"\"\"
""")

In [None]:
for idx, row in merged.iterrows():
    text = row["text"]
    chain = prompt_template | llm
    response = chain.invoke({"text": text})
    
    try:
        result = json.loads(response.content)
    except json.JSONDecodeError:
        result = {"hook_score": 0, "hook_reason": "Parse error", "confidence": 0.0}
    
    merged.at[idx, "hook_score"] = int(result.get("hook_score", 0))
    merged.at[idx, "hook_confidence"] = float(result.get("confidence", 0.0))
    print("hook_score:", merged.at[idx, "hook_score"],
          "confidence:", merged.at[idx, "hook_confidence"])
    
    time.sleep(0.05)

In [16]:
import pandas as pd
from langchain_community.chat_models import ChatOllama
from langchain.prompts import PromptTemplate
import json

# ======== Setup ========
df = pd.read_csv("analysis.csv")  # Your DataFrame: start, end, text, speaker
BATCH_SIZE = 10  # Number of rows per request

# Create Ollama client for Mistral
llm = ChatOllama(
    model="mistral",
    temperature=0
)

# Prompt Template
prompt_template = """
You are a video hook scoring assistant. 
Given a list of transcript segments, score each from 1 to 10 based on how likely it is to hook viewers' attention.
Also give a confidence score between 0 and 1 representing your certainty in the score.
Return only a JSON array where each element contains:
- "hook_score": integer (1–10)
- "hook_confidence": float (0.0–1.0)

Example output:
[
  {{"hook_score": 8, "hook_confidence": 0.92}},
  {{"hook_score": 3, "hook_confidence": 0.65}}
]

Segments:
{segments}
"""

prompt = PromptTemplate(
    input_variables=["segments"],
    template=prompt_template
)

# ======== Batch Processing ========
all_results = []

for i in range(0, len(df), BATCH_SIZE):
    batch = df.iloc[i:i+BATCH_SIZE]
    batch_text = "\n".join(
        [f"{idx+1}. {row.text}" for idx, row in enumerate(batch.itertuples(), start=0)]
    )

    final_prompt = prompt.format(segments=batch_text)
    response = llm.predict(final_prompt).strip()

    try:
        batch_results = json.loads(response)
        if isinstance(batch_results, list) and len(batch_results) == len(batch):
            for res in batch_results:
                all_results.append(res)
                print(f"✅ Hook score: {res['hook_score']} | Confidence: {res['hook_confidence']}")
        else:
            for _ in range(len(batch)):
                all_results.append({"hook_score": None, "hook_confidence": None})
    except json.JSONDecodeError:
        for _ in range(len(batch)):
            all_results.append({"hook_score": None, "hook_confidence": None})

# ======== Merge Back into DataFrame ========
results_df = pd.DataFrame(all_results)
df["hook_score"] = results_df["hook_score"]
df["hook_confidence"] = results_df["hook_confidence"]

# Save result
df.to_csv("merge_with_hook_scores.csv", index=False)
print("🎯 Hook scoring complete. Saved to merge_with_hook_scores.csv")


✅ Hook score: 6 | Confidence: 0.85
✅ Hook score: 7 | Confidence: 0.9
✅ Hook score: 9 | Confidence: 0.95
✅ Hook score: 10 | Confidence: 1.0
✅ Hook score: 8 | Confidence: 0.9
✅ Hook score: 7 | Confidence: 0.85
✅ Hook score: 3 | Confidence: 0.65
✅ Hook score: 4 | Confidence: 0.75
✅ Hook score: 9 | Confidence: 0.95
✅ Hook score: 10 | Confidence: 1.0
✅ Hook score: 8 | Confidence: 0.95
✅ Hook score: 7 | Confidence: 0.9
✅ Hook score: 6 | Confidence: 0.85
✅ Hook score: 4 | Confidence: 0.7
✅ Hook score: 2 | Confidence: 0.6
✅ Hook score: 3 | Confidence: 0.75
✅ Hook score: 5 | Confidence: 0.8
✅ Hook score: 7 | Confidence: 0.9
✅ Hook score: 8 | Confidence: 0.9
✅ Hook score: 9 | Confidence: 1
✅ Hook score: 10 | Confidence: 1.0
✅ Hook score: 7 | Confidence: 0.95
✅ Hook score: 6 | Confidence: 0.85
✅ Hook score: 4 | Confidence: 0.75
✅ Hook score: 8 | Confidence: 0.95
✅ Hook score: 6 | Confidence: 0.9
✅ Hook score: 7 | Confidence: 0.9
✅ Hook score: 4 | Confidence: 0.7
✅ Hook score: 5 | Confidence: 0.8


KeyError: 'hook_confidence'