In [None]:
# Reset previous speaker and role variables for segmentation
previous_speaker, previous_role = None, None
segment_id = 0
segments = []

# Segment the new dialogue based on topic transitions using the same function
for index, row in data_new.iterrows():
    if detect_topic_transition(row, previous_speaker, previous_role):
        segment_id += 1  # New segment
    segments.append(segment_id)
    
    # Update previous speaker and role for the next iteration
    previous_speaker, previous_role = row['speaker'], row['role']

# Append the segment information to the new DataFrame
data_new['segment'] = segments

# Evaluate each segment in the new data using the same evaluation function
segment_scores_new = []
for segment_id in data_new['segment'].unique():
    segment_texts = data_new[data_new['segment'] == segment_id]['text'].tolist()
    score = evaluate_segment(segment_texts)
    segment_scores_new.append((segment_id, score))

# Create a DataFrame to display results for the new dialogue file
evaluation_df_new = pd.DataFrame(segment_scores_new, columns=['Segment ID', 'Score'])

# Display the evaluation results for the new dialogue
tools.display_dataframe_to_user(name="New Segment Evaluation Results", dataframe=evaluation_df_new)

# Show the evaluation DataFrame for confirmation
evaluation_df_new.head()

In [None]:
# Define a function to evaluate each segment based on the specified criteria
def evaluate_segment(segment_texts):
    # Basic heuristics to determine scores
    if all("?" not in text for text in segment_texts):  # If no questions, likely a smoother, more declarative flow
        if all(len(text) > 5 for text in segment_texts):  # Smooth conversation with longer utterances
            return 5  # Score 5 for very smooth and fluent communication
        else:
            return 4  # Score 4 if slightly shorter, but still clear in communication
    
    # Check for hesitation or awkwardness in shorter exchanges or unclear questions/responses
    if any("..." in text or len(text) < 4 for text in segment_texts):  # Indication of hesitation or brevity
        if any("?" in text and len(text) < 5 for text in segment_texts):
            return 3  # Score 3 for minor awkwardness or hesitation in communication
    
    # Awkward and less coherent communication based on length and response relevance
    if any("?" in text and "..." in text for text in segment_texts):
        return 2  # Score 2 if multiple hesitations or awkward questions and responses
    
    # Score of 1 for mostly failed communication attempts
    return 1

# Apply segmentation and evaluation
segment_scores = []
for segment_id in data['segment'].unique():
    segment_texts = data[data['segment'] == segment_id]['text'].tolist()
    score = evaluate_segment(segment_texts)
    segment_scores.append((segment_id, score))

# Create a DataFrame to display results
evaluation_df = pd.DataFrame(segment_scores, columns=['Segment ID', 'Score'])

# Display the evaluation results to the user
tools.display_dataframe_to_user(name="Segment Evaluation Results", dataframe=evaluation_df)

# Show the evaluation DataFrame for confirmation
evaluation_df.head()


In [None]:
# Revised evaluation function to include new criteria for scoring
def evaluate_segment_with_interactivity(segment_texts):
    # Check for presence of certain interactive qualities for adjusted scoring
    score = 1  # Start with lowest score and upgrade based on checks
    
    # Check for smooth openings and closings
    if any(text.lower().startswith(("hi", "hello", "good", "so")) for text in segment_texts):
        score = max(score, 3)  # Presence of conversational opening raises score

    if any(text.lower().startswith(("thanks", "thank you", "bye", "okay, that's it")) for text in segment_texts):
        score = max(score, 3)  # Presence of a conversational closing phrase raises score
    
    # Check for topic management cues like transitions
    if any(text.lower() in ["moving on", "next topic", "let's talk about"] for text in segment_texts):
        score = max(score, 4)  # Indicate smooth topic management
    
    # Check tone appropriateness and fluency
    if all(len(text) > 5 for text in segment_texts):  # Smooth, continuous exchanges
        score = max(score, 5)  # Highest fluency and appropriateness in tone with longer utterances
    
    # Check for awkward responses or signs of hesitation
    if any("..." in text or len(text) < 4 for text in segment_texts):  # Hesitation or brevity indicates awkwardness
        score = min(score, 3)  # Reduce score if awkwardness is observed
    
    return score

# Re-evaluate each segment in the first dialogue with updated criteria
segment_scores_first_dialogue = []
for segment_id in data['segment'].unique():
    segment_texts = data[data['segment'] == segment_id]['text'].tolist()
    score = evaluate_segment_with_interactivity(segment_texts)
    segment_scores_first_dialogue.append((segment_id, score))

# Create DataFrame for the re-evaluated scores of the first dialogue
evaluation_df_first_dialogue = pd.DataFrame(segment_scores_first_dialogue, columns=['Segment ID', 'Score'])

# Re-evaluate each segment in the second dialogue with updated criteria
segment_scores_second_dialogue = []
for segment_id in data_new['segment'].unique():
    segment_texts = data_new[data_new['segment'] == segment_id]['text'].tolist()
    score = evaluate_segment_with_interactivity(segment_texts)
    segment_scores_second_dialogue.append((segment_id, score))

# Create DataFrame for the re-evaluated scores of the second dialogue
evaluation_df_second_dialogue = pd.DataFrame(segment_scores_second_dialogue, columns=['Segment ID', 'Score'])

# Display the re-evaluation results for both dialogues
import ace_tools as tools
tools.display_dataframe_to_user(name="Re-evaluated Segment Scores - First Dialogue", dataframe=evaluation_df_first_dialogue)
tools.display_dataframe_to_user(name="Re-evaluated Segment Scores - Second Dialogue", dataframe=evaluation_df_second_dialogue)

evaluation_df_first_dialogue.head(), evaluation_df_second_dialogue.head()



In [None]:
# Refine segmentation based on explicit topic transitions and concluding phrases
def detect_topic_transition_v2(row, previous_text):
    # Define explicit cues for topic transitions and conclusions
    transition_cues = ["moving on", "next topic", "now", "let's discuss", "let's move on", "to another point"]
    conclusion_cues = ["so that's it", "okay then", "let's wrap up", "thanks", "that concludes"]
    
    # Detect topic transition if the current text contains transition cues
    if any(cue in row['text'].lower() for cue in transition_cues):
        return True
    # Detect end of a topic if the previous text contained conclusion cues
    if previous_text and any(cue in previous_text.lower() for cue in conclusion_cues):
        return True
    return False

# Re-segment the first dialogue
previous_text = None
segment_id = 0
segments_first = []

for index, row in data.iterrows():
    if detect_topic_transition_v2(row, previous_text):
        segment_id += 1  # New segment
    segments_first.append(segment_id)
    previous_text = row['text']  # Update previous text

data['segment'] = segments_first  # Update segment assignments in the DataFrame

# Re-segment the second dialogue
previous_text = None
segment_id = 0
segments_second = []

for index, row in data_new.iterrows():
    if detect_topic_transition_v2(row, previous_text):
        segment_id += 1  # New segment
    segments_second.append(segment_id)
    previous_text = row['text']

data_new['segment'] = segments_second  # Update segment assignments in the DataFrame

# Re-evaluate each segment in the first dialogue with updated segmentation and criteria
segment_scores_first_dialogue_v2 = []
for segment_id in data['segment'].unique():
    segment_texts = data[data['segment'] == segment_id]['text'].tolist()
    score = evaluate_segment_with_interactivity(segment_texts)
    segment_scores_first_dialogue_v2.append((segment_id, score))

# DataFrame for re-evaluated scores of the first dialogue
evaluation_df_first_dialogue_v2 = pd.DataFrame(segment_scores_first_dialogue_v2, columns=['Segment ID', 'Score'])

# Re-evaluate each segment in the second dialogue with updated segmentation and criteria
segment_scores_second_dialogue_v2 = []
for segment_id in data_new['segment'].unique():
    segment_texts = data_new[data_new['segment'] == segment_id]['text'].tolist()
    score = evaluate_segment_with_interactivity(segment_texts)
    segment_scores_second_dialogue_v2.append((segment_id, score))

# DataFrame for re-evaluated scores of the second dialogue
evaluation_df_second_dialogue_v2 = pd.DataFrame(segment_scores_second_dialogue_v2, columns=['Segment ID', 'Score'])

# Display the re-evaluation results for both dialogues with new segmentation
tools.display_dataframe_to_user(name="Re-evaluated Segment Scores - First Dialogue (Refined Segmentation)", dataframe=evaluation_df_first_dialogue_v2)
tools.display_dataframe_to_user(name="Re-evaluated Segment Scores - Second Dialogue (Refined Segmentation)", dataframe=evaluation_df_second_dialogue_v2)

evaluation_df_first_dialogue_v2.head(), evaluation_df_second_dialogue_v2.head()