# Setups

In [2]:
import pandas as pd
import numpy as np
import re
from collections import Counter

# Import Data and Codes

In [3]:
df = pd.read_csv('/Users/ningyuhan/Desktop/combined_dataframe.csv')

In [4]:
verbal_contribution_codes = {
    '1': 'Initiation activity',
    '1a': 'Motion making',
    '1b': 'Information giving',
    '1c': 'Information seeking',
    '1d': 'Making specific suggestions for action',
    '2': 'Support behavior',
    '2a': 'Motion seconding',
    '2b': 'Making statements in support of another person\'s argument',
    '3': 'System maintenance',
    '3a': 'Tension management',
    '3b': 'Direction of traffic',
    '3c': 'Collective spirit and solidarity moves',
    '4': 'Board discussion, debate, argumentation',
    '4a': 'Personal defensiveness',
    '4b': 'Personal gains',
    '4c': 'Agreeing reluctantly',
    '4d': 'Sensible,nonpersonal arguments',
    '4e1': 'Management',
    '4e2': 'Union',
    '4e3': 'People (workers) as distinct from union or the union leadership',
    '4f': 'Attempts to propose new board topics',
    '4g': 'Corporate interests',
    '4h': 'Disagreements, conflicts, attacks',
    '4i': 'Stonewalling',
    '05': 'Unclassified verbal behaviors',
}

topic_codes = {
    '01': 'Routine Board Functions',
    '02': 'Scope of Board Issues',
    '03': 'Financial Status of Firm',
    '04': 'Management Personnel Issues',
    '05': 'Marketing and Sales',
    '06': 'Employee Stock Ownership Plan (ESOP) Financial',
    '07': 'ESOP Participation',
    '08': 'Employee Benefits',
    '09': 'Plant Production and Manufacturing Process Issues',
    '10': 'The Union and Contract Issues',
    '11': 'Supervision',
    '12': 'hog procurement',
    '13': 'Political Relations with the Community'
}

In [5]:
data = {
    'Name': ['Chuck Swisher', 'Emmet “Mac” MacGuire', 'Herb Epstein', 'John Lambert', 'Wally Rath, regional sales managers, & Robert Cray & John DeGroat', 'Bob Soleday, Mowry, Bloomfield,(beginning Jan. 1985)', 'Ivan Pihl', 'Art Frye', 'Jack Thomas', 'Harold Rath', 'Ralph Helstein', 'Tove Hammer', 'Len Dodson', 'Sid Oberman', 'Bob Fulton', 'Walter Cunningham', 'Dick Clarke', 'Phyllis Walters', 'Earl Murray', 'Glen Bass', 'Clark Towne', 'LaVerne Patrie', 'Peter Bruskern', 'Bob Kavangh', 'Lyle Taylor', 'Chuck Mueller', 'Gene Redmond & other union officials', 'Jim Miller', 'Ron Peterson, William Scogland, Charles McCarthy, Tom Mandler, Wes Hall, Ravel', 'Berthold', 'Rudnick', 'Gerjerts, Bill Wait', 'Jack Curtis', 'Bruce Wilson', 'John Stevens, Greg Kohn, Lewis Rudel', 'Potential Business deal people', 'ESOP consultants (Chris Meek & Warner Woodworth, W.F. Whyte)', 'ESOP trustees, Larry Wrede, Jim Anderson, Cox', 'Wayne Wright', 'Insurance people & other consultants', 'Leroy Grittman, Tobias, Mary Frost', 'Swisher & Cohrt, Law firm (Steve Weidner)'],
    'Id Number': ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44']
}

person_df = pd.DataFrame(data)

# Data Preprocessing

In [6]:
def standardize_text(text):
    if pd.isna(text):
        return text  
    # Remove non-ASCII characters directly
    text = ''.join([char for char in text if ord(char) < 128])
    text = text.strip().lower()  
    # Remove all non-alphanumeric characters
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    return text

df['Standardized_Sentence'] = df['Sentence'].apply(standardize_text)

def standardize_tag(tag):
    if pd.isna(tag):
        return tag
    # Remove all spaces and extra characters
    tag = tag.replace(' ', '')
    # Ensure consistency in formatting
    tag = ','.join(tag.split(','))
    return tag

df['Tag'] = df['Tag'].apply(standardize_tag)

df['Tag'] = df['Tag'].apply(
    lambda x: x.replace('4h,30]', '4h,03]') if '4h,30]' in x else x
)

def correct_typo(tag):
    parts = tag.strip('[]').split(',')
    if len(parts) > 3 and len(parts[3]) == 1:
        parts[3] = '0' + parts[3]  # Add a leading zero if it's a single digit
    return '[' + ','.join(parts) + ']'

df['Tag'] = df['Tag'].apply(correct_typo)

# Data Analysis

## Set Focus Category

In [7]:
df['Tag_Component'] = df['Tag'].apply(lambda x: x.split(',')[2] if len(x.split(',')) > 2 else None)

# Set the focus category here
focus_category = '4h'
df['Is_Focus_Category'] = df['Tag_Component'] == focus_category

## Base rate

In [8]:
# Randomly select non-overlapping clusters of 3 consecutive sentences
num_sentences = len(df)
cluster_size = 3
max_clusters = num_sentences // cluster_size
start_indices = sorted(np.random.choice(range(num_sentences - cluster_size + 1), max_clusters, replace=False))

# Initialize dictionaries to count occurrences
topic_counts = {}
verbal_contribution_counts = {}

# Process each cluster
for start in start_indices:
    cluster = df.iloc[start:start + cluster_size]
    for _, row in cluster.iterrows():
        # Extract topic and verbal components
        topic_component = row['Tag'].split(',')[3].strip(']').strip() if len(row['Tag'].split(',')) > 3 else None
        verbal_component = row['Tag'].split(',')[2].strip(']').strip() if len(row['Tag'].split(',')) > 2 else None

        # Count topics
        topic_counts[topic_component] = topic_counts.get(topic_component, 0) + 1
        # Count verbal contributions
        verbal_contribution_counts[verbal_component] = verbal_contribution_counts.get(verbal_component, 0) + 1

# Calculate total counts for percentages
total_topic_count = sum(topic_counts.values())
total_verbal_count = sum(verbal_contribution_counts.values())

# Translate and calculate percentages
translated_topic_counts = {topic_codes.get(key, "Unknown"): (value / total_topic_count) * 100 for key, value in topic_counts.items()}
translated_verbal_counts = {verbal_contribution_codes.get(key, "Unknown"): (value / total_verbal_count) * 100 for key, value in verbal_contribution_counts.items()}

# Sort and print the results
sorted_topic_counts = sorted(translated_topic_counts.items(), key=lambda x: x[1], reverse=True)
sorted_verbal_counts = sorted(translated_verbal_counts.items(), key=lambda x: x[1], reverse=True)

print("Topic Percentages:")
for topic, percentage in sorted_topic_counts:
    print(f"{topic}: {percentage:.2f}%")

print("\nVerbal Contribution Percentages:")
for verbal_contribution, percentage in sorted_verbal_counts:
    print(f"{verbal_contribution}: {percentage:.2f}%")


Topic Percentages:
Financial Status of Firm: 26.09%
The Union and Contract Issues: 12.85%
Management Personnel Issues: 12.11%
Routine Board Functions: 11.02%
Plant Production and Manufacturing Process Issues: 8.71%
Marketing and Sales: 7.95%
Employee Benefits: 6.49%
ESOP Participation: 5.35%
Employee Stock Ownership Plan (ESOP) Financial: 4.55%
hog procurement: 2.47%
Supervision: 1.34%
Political Relations with the Community: 0.95%
Scope of Board Issues: 0.12%
Unknown: 0.02%

Verbal Contribution Percentages:
Information giving: 26.13%
Sensible,nonpersonal arguments: 16.37%
Information seeking: 11.47%
Disagreements, conflicts, attacks: 6.86%
Unclassified verbal behaviors: 4.18%
Motion making: 4.14%
Union: 3.89%
Motion seconding: 3.81%
Tension management: 3.40%
Direction of traffic: 2.51%
Stonewalling: 2.47%
Personal defensiveness: 2.47%
Agreeing reluctantly: 2.43%
Management: 2.29%
Making specific suggestions for action: 2.04%
Making statements in support of another person's argument: 1.

## Get 3 setences AFTER "4h" instances, keep the Last "4h" in consecutive "4hs"

In [9]:
def get_relevant_rows_after(df, focus_column='Is_Focus_Category'):
    relevant_rows = []
    in_consecutive_focus = False  # Flag to track if we're in a series of consecutive focus category tagged rows

    for i in range(len(df) - 1):  # Adjusted to avoid index out of range
        current_is_focus = df.iloc[i][focus_column]
        next_is_focus = df.iloc[i + 1][focus_column]

        # If the current row is focus category and the next row is not, or it's the last focus category in a series
        if current_is_focus and not next_is_focus and in_consecutive_focus:
            # Reset the flag since this is the last focus category in a series
            in_consecutive_focus = False
            # Add the last focus category row in the series
            relevant_rows.append(df.iloc[i])
            # Add the next three non-focus category rows
            for j in range(1, 4):
                if i+j >= len(df):  # Check if the index goes beyond the DataFrame
                    break
                if not df.iloc[i+j][focus_column]:
                    relevant_rows.append(df.iloc[i+j])
                else:
                    break  # Stop if another focus category tagged row is encountered

        # If the current row is focus category and it's the first in a series or standalone
        elif current_is_focus and not in_consecutive_focus:
            if not next_is_focus:  # If the next row is not focus category, treat it as a standalone focus category row
                relevant_rows.append(df.iloc[i])
                # Add the next three non-focus category rows
                for j in range(1, 4):
                    if i+j >= len(df):
                        break
                    if not df.iloc[i+j][focus_column]:
                        relevant_rows.append(df.iloc[i+j])
                    else:
                        break
            else:  # If the next row is also focus category, set the flag and skip this row
                in_consecutive_focus = True

    return pd.DataFrame(relevant_rows).sort_index()

In [10]:
df_after = get_relevant_rows_after(df)
#df_after

### Percentage of topics after 4h

In [11]:
# Extract the fourth component (Topic) from the 'Tag' field
df_after['Topic_Component'] = df_after['Tag'].apply(
    lambda x: x.split(',')[3].strip(']').strip() if len(x.split(',')) > 3 else None
)

# Filter the DataFrame to exclude rows where 'Is_Focus_Category' is True for the percentage calculation
df_excluding_focus = df_after[~df_after['Is_Focus_Category']]

# Count the occurrences of each unique 'Topic_Component' in these rows
topic_component_counts = df_excluding_focus['Topic_Component'].value_counts()

# Calculate the total count for percentage calculation from the filtered dataframe
total_count = df_excluding_focus['Topic_Component'].count()

# Translate the topic components and calculate the percentage using the filtered counts
translated_topic_counts = {
  topic_codes.get(key, "Unknown"): {
    'Count': value, 
    'Percentage': (value / total_count) * 100 if total_count > 0 else 0  # Check to avoid division by zero
  }
  for key, value in topic_component_counts.items()
}

for topic, data in translated_topic_counts.items():
  percentage = round(data['Percentage'], 2)
  data['Percentage'] = f"{percentage}%"

# Display the translated counts and percentages
for topic, data in translated_topic_counts.items():
  print(f"{topic}: {data['Count']} times ({data['Percentage']})")


Financial Status of Firm: 146 times (20.56%)
Management Personnel Issues: 126 times (17.75%)
The Union and Contract Issues: 96 times (13.52%)
Routine Board Functions: 79 times (11.13%)
Plant Production and Manufacturing Process Issues: 64 times (9.01%)
ESOP Participation: 57 times (8.03%)
Marketing and Sales: 41 times (5.77%)
Employee Benefits: 34 times (4.79%)
Employee Stock Ownership Plan (ESOP) Financial: 33 times (4.65%)
Supervision: 16 times (2.25%)
hog procurement: 12 times (1.69%)
Political Relations with the Community: 4 times (0.56%)
Scope of Board Issues: 2 times (0.28%)


### Percentage of verbal contribution categories after 4h

In [12]:
# Extract the third component (Verbal Contribution) from the 'Tag' field in df_after
df_after['Verbal_Component'] = df_after['Tag'].apply(
    lambda x: x.split(',')[2].strip(']').strip() if len(x.split(',')) > 2 else None
)

# Count the occurrences of each unique 'Verbal_Component' in df_after
verbal_component_counts = df_after['Verbal_Component'].value_counts()

# Exclude focus category sentences from the percentage calculation
# Filter df_after for rows where 'Is_Focus_Category' is False
df_non_focus = df_after[~df_after['Is_Focus_Category']]

# Count the occurrences of each unique 'Verbal_Component' in these non-focus rows
non_focus_verbal_component_counts = df_non_focus['Verbal_Component'].value_counts()

# Calculate the total count for percentage calculation excluding focus category
total_non_focus_count = non_focus_verbal_component_counts.sum()

# Translate the verbal components using the verbal_contribution_codes dictionary
# Here we use the full count including focus category but calculate percentage using non-focus count
translated_verbal_counts = {
    verbal_contribution_codes.get(key, "Unknown"): {
        'Count': verbal_component_counts.get(key, 0),  # Full count including focus category
        'Percentage': (non_focus_verbal_component_counts.get(key, 0) / total_non_focus_count) * 100
        # Percentage calculated using non-focus count
    }
    for key in verbal_component_counts.keys()  # Iterate over all keys to include every component in the count
}

# Format the percentage to two decimal places and add a percentage sign
for verbal_contribution, data in translated_verbal_counts.items():
    percentage = round(data['Percentage'], 2)
    data['Percentage'] = f"{percentage}%"

# Print the counts and percentages for each verbal contribution
for verbal_contribution, data in translated_verbal_counts.items():
    # Skip printing the focus category if desired
    if verbal_contribution != verbal_contribution_codes.get(focus_category, focus_category):
        print(f"{verbal_contribution}: {data['Count']} times ({data['Percentage']})")


Information giving: 125 times (17.61%)
Sensible,nonpersonal arguments: 124 times (17.46%)
Information seeking: 94 times (13.24%)
Personal defensiveness: 49 times (6.9%)
Union: 46 times (6.48%)
Stonewalling: 39 times (5.49%)
Tension management: 36 times (5.07%)
Direction of traffic: 27 times (3.8%)
Agreeing reluctantly: 27 times (3.8%)
Management: 24 times (3.38%)
Unclassified verbal behaviors: 23 times (3.24%)
Making statements in support of another person's argument: 20 times (2.82%)
Motion making: 18 times (2.54%)
Motion seconding: 14 times (1.97%)
Corporate interests: 11 times (1.55%)
Making specific suggestions for action: 11 times (1.55%)
Personal gains: 10 times (1.41%)
Attempts to propose new board topics: 8 times (1.13%)
People (workers) as distinct from union or the union leadership: 4 times (0.56%)


##  Get 3 sentences BEFORE "4h" instances, keep the FIRST "4h" in consecutive "4hs"

In [13]:
def get_relevant_rows_before(df, focus_column='Is_Focus_Category'):
    relevant_rows = []
    in_consecutive_focus = False  # Flag to track if we're in a series of consecutive focus category tagged rows

    for i in range(1, len(df)):  # Start from 1 since we're looking at rows before the current
        current_is_focus = df.iloc[i][focus_column]
        prev_is_focus = df.iloc[i - 1][focus_column]

        # If the current row is focus category and the previous row is not, or it's the first focus category in a series
        if current_is_focus and not prev_is_focus:
            # Reset the flag since this is the first focus category in a series
            in_consecutive_focus = False
            # Keep the current focus category row if it's standalone or the first in a series
            relevant_rows.append(df.iloc[i])
            # Add the previous three non-focus category rows
            for j in range(1, 4):
                if i-j < 0:  # Check if the index is below the DataFrame range
                    break
                if not df.iloc[i-j][focus_column]:
                    relevant_rows.insert(0, df.iloc[i-j])  # Insert at the beginning to maintain order
                else:
                    break  # Stop if another focus category tagged row is encountered

        # If the current row and the previous row are both focus category, we are in a series of consecutive focus categories
        elif current_is_focus and prev_is_focus:
            in_consecutive_focus = True
            # Since we're keeping the first row in a series of consecutive focus categories, do nothing here

    return pd.DataFrame(relevant_rows).sort_index()

In [14]:
df_before = get_relevant_rows_before(df)
#df_before

### Percentage of topics before 4h

In [15]:
# Extract the fourth component (Topic) from the 'Tag' field for df_before
df_before['Topic_Component'] = df_before['Tag'].apply(
    lambda x: x.split(',')[3].strip(']').strip() if len(x.split(',')) > 3 else None
)

# Filter the DataFrame to exclude rows where 'Is_Focus_Category' is True for the percentage calculation
df_excluding_focus_before = df_before[~df_before['Is_Focus_Category']]

# Count the occurrences of each unique 'Topic_Component' in these rows for df_before
topic_component_counts_before = df_excluding_focus_before['Topic_Component'].value_counts()

# Calculate the total count for percentage calculation from the filtered dataframe for df_before
total_count_before = df_excluding_focus_before['Topic_Component'].count()

# Translate the topic components and calculate the percentage using the filtered counts for df_before
translated_topic_counts_before = {
  topic_codes.get(key, "Unknown"): {
    'Count': value, 
    'Percentage': (value / total_count_before) * 100 if total_count_before > 0 else 0  # Check to avoid division by zero
  }
  for key, value in topic_component_counts_before.items()
}

for topic, data in translated_topic_counts_before.items():
  percentage = round(data['Percentage'], 2)
  data['Percentage'] = f"{percentage}%"

# Display the translated counts and percentages for sentences before the focus category
for topic, data in translated_topic_counts_before.items():
  print(f"{topic}: {data['Count']} times ({data['Percentage']})")


Financial Status of Firm: 147 times (20.7%)
The Union and Contract Issues: 121 times (17.04%)
Management Personnel Issues: 114 times (16.06%)
Marketing and Sales: 62 times (8.73%)
Plant Production and Manufacturing Process Issues: 57 times (8.03%)
Routine Board Functions: 53 times (7.46%)
ESOP Participation: 49 times (6.9%)
Employee Benefits: 37 times (5.21%)
Employee Stock Ownership Plan (ESOP) Financial: 32 times (4.51%)
Supervision: 20 times (2.82%)
hog procurement: 12 times (1.69%)
Political Relations with the Community: 3 times (0.42%)
Scope of Board Issues: 2 times (0.28%)
Unknown: 1 times (0.14%)


In [16]:
unknown_topic_keys = [key for key in df_excluding_focus_before['Topic_Component'].unique() if key not in topic_codes]
df_unknown_topics = df_excluding_focus_before[df_excluding_focus_before['Topic_Component'].isin(unknown_topic_keys)]
df_unknown_topics

Unnamed: 0,Tag,Sentence,Standardized_Sentence,Tag_Component,Is_Focus_Category,Topic_Component
1626,"[c:194,39,4d,47]",Bill is talking about goal and feedback and es...,bill is talking about goal and feedback and es...,4d,False,47


### Percentage of verbal contribution categories before 4h

In [17]:
df_before['Verbal_Component'] = df_before['Tag'].apply(
    lambda x: x.split(',')[2].strip(']').strip() if len(x.split(',')) > 2 else None
)

# Filter df_before for rows where 'Is_Focus_Category' is False to exclude focus category sentences
df_non_focus_before = df_before[~df_before['Is_Focus_Category']]

# Count the occurrences of each unique 'Verbal_Component' in these non-focus rows of df_before
non_focus_verbal_component_counts_before = df_non_focus_before['Verbal_Component'].value_counts()

# Calculate the total count for percentage calculation excluding focus category sentences in df_before
total_non_focus_count_before = non_focus_verbal_component_counts_before.sum()

# Translate the verbal components using the verbal_contribution_codes dictionary
# Here we use the count from non-focus rows for both count and percentage calculation
translated_verbal_counts_before = {
    verbal_contribution_codes.get(key, "Unknown"): {
        'Count': non_focus_verbal_component_counts_before.get(key, 0),  # Count from non-focus rows
        'Percentage': (non_focus_verbal_component_counts_before.get(key, 0) / total_non_focus_count_before) * 100
        if total_non_focus_count_before > 0 else 0  # Check to avoid division by zero
    }
    for key in non_focus_verbal_component_counts_before.keys()  # Iterate over keys from non-focus rows
}

# Sort the translated verbal contributions, ensuring 'Unknown' is at the end
sorted_verbal_counts_before = sorted(translated_verbal_counts_before.items(), key=lambda x: (-x[1]['Percentage'], x[0] == 'Unknown'))

# Print the counts and percentages for each verbal contribution in sentences before the focus category
for verbal_contribution, data in sorted_verbal_counts_before:
    percentage = round(data['Percentage'], 2)  # Ensure percentage is rounded to two decimal places
    print(f"{verbal_contribution}: {data['Count']} times ({percentage}%)")


Information giving: 148 times (20.85%)
Sensible,nonpersonal arguments: 137 times (19.3%)
Information seeking: 92 times (12.96%)
Union: 50 times (7.04%)
Personal defensiveness: 40 times (5.63%)
Stonewalling: 31 times (4.37%)
Management: 28 times (3.94%)
Direction of traffic: 24 times (3.38%)
Tension management: 24 times (3.38%)
Unclassified verbal behaviors: 24 times (3.38%)
Motion seconding: 20 times (2.82%)
Making statements in support of another person's argument: 16 times (2.25%)
Motion making: 15 times (2.11%)
Making specific suggestions for action: 13 times (1.83%)
Agreeing reluctantly: 12 times (1.69%)
Personal gains: 10 times (1.41%)
Corporate interests: 8 times (1.13%)
Attempts to propose new board topics: 7 times (0.99%)
Collective spirit and solidarity moves: 5 times (0.7%)
People (workers) as distinct from union or the union leadership: 5 times (0.7%)
Unknown: 1 times (0.14%)


In [18]:
unknown_verbal_keys = [key for key in df_non_focus_before['Verbal_Component'].unique() if key not in verbal_contribution_codes]
df_unknown_verbal_before = df_non_focus_before[df_non_focus_before['Verbal_Component'].isin(unknown_verbal_keys)]
df_unknown_verbal_before

Unnamed: 0,Tag,Sentence,Standardized_Sentence,Tag_Component,Is_Focus_Category,Topic_Component,Verbal_Component
436,"[c:66,04,4e,01,07]","John Lambert, at that point, speaks and says t...",john lambert at that point speaks and says tha...,4e,False,1,4e


# Summary

## Topic

In [19]:
print("\033[1;31mBase Rate:\033[0m")  
print("\033[1;36mTopic Percentages:\033[0m") 
for topic, percentage in sorted_topic_counts:
    print(f"{topic}: {percentage:.2f}%")

[1;31mBase Rate:[0m
[1;36mTopic Percentages:[0m
Financial Status of Firm: 26.09%
The Union and Contract Issues: 12.85%
Management Personnel Issues: 12.11%
Routine Board Functions: 11.02%
Plant Production and Manufacturing Process Issues: 8.71%
Marketing and Sales: 7.95%
Employee Benefits: 6.49%
ESOP Participation: 5.35%
Employee Stock Ownership Plan (ESOP) Financial: 4.55%
hog procurement: 2.47%
Supervision: 1.34%
Political Relations with the Community: 0.95%
Scope of Board Issues: 0.12%
Unknown: 0.02%


## Topic before 4h

In [20]:
print("\033[1;36mTopic Percentages:\033[0m") 
for topic, data in translated_topic_counts_before.items():
  print(f"{topic}: {data['Count']} times ({data['Percentage']})")

[1;36mTopic Percentages:[0m
Financial Status of Firm: 147 times (20.7%)
The Union and Contract Issues: 121 times (17.04%)
Management Personnel Issues: 114 times (16.06%)
Marketing and Sales: 62 times (8.73%)
Plant Production and Manufacturing Process Issues: 57 times (8.03%)
Routine Board Functions: 53 times (7.46%)
ESOP Participation: 49 times (6.9%)
Employee Benefits: 37 times (5.21%)
Employee Stock Ownership Plan (ESOP) Financial: 32 times (4.51%)
Supervision: 20 times (2.82%)
hog procurement: 12 times (1.69%)
Political Relations with the Community: 3 times (0.42%)
Scope of Board Issues: 2 times (0.28%)
Unknown: 1 times (0.14%)


## Topic after 4h

In [21]:
print("\033[1;36mTopic Percentages:\033[0m") 
for topic, data in translated_topic_counts.items():
  print(f"{topic}: {data['Count']} times ({data['Percentage']})")

[1;36mTopic Percentages:[0m
Financial Status of Firm: 146 times (20.56%)
Management Personnel Issues: 126 times (17.75%)
The Union and Contract Issues: 96 times (13.52%)
Routine Board Functions: 79 times (11.13%)
Plant Production and Manufacturing Process Issues: 64 times (9.01%)
ESOP Participation: 57 times (8.03%)
Marketing and Sales: 41 times (5.77%)
Employee Benefits: 34 times (4.79%)
Employee Stock Ownership Plan (ESOP) Financial: 33 times (4.65%)
Supervision: 16 times (2.25%)
hog procurement: 12 times (1.69%)
Political Relations with the Community: 4 times (0.56%)
Scope of Board Issues: 2 times (0.28%)


## Verbal Contribution

In [22]:
print("\033[1;31mBase Rate:\033[0m")  
print("\033[1;36mVerbal Contribution Percentages:\033[0m") 
for verbal_contribution, percentage in sorted_verbal_counts:
    print(f"{verbal_contribution}: {percentage:.2f}%")

[1;31mBase Rate:[0m
[1;36mVerbal Contribution Percentages:[0m
Information giving: 26.13%
Sensible,nonpersonal arguments: 16.37%
Information seeking: 11.47%
Disagreements, conflicts, attacks: 6.86%
Unclassified verbal behaviors: 4.18%
Motion making: 4.14%
Union: 3.89%
Motion seconding: 3.81%
Tension management: 3.40%
Direction of traffic: 2.51%
Stonewalling: 2.47%
Personal defensiveness: 2.47%
Agreeing reluctantly: 2.43%
Management: 2.29%
Making specific suggestions for action: 2.04%
Making statements in support of another person's argument: 1.42%
Corporate interests: 1.38%
Attempts to propose new board topics: 0.93%
Personal gains: 0.78%
Collective spirit and solidarity moves: 0.47%
People (workers) as distinct from union or the union leadership: 0.43%
Unknown: 0.06%


## Verbal contribution before 4h

In [23]:
print("\033[1;36mVerbal Contribution Percentages:\033[0m") 
for verbal_contribution, data in sorted_verbal_counts_before:
    percentage = round(data['Percentage'], 2) 
    print(f"{verbal_contribution}: {data['Count']} times ({percentage}%)")

[1;36mVerbal Contribution Percentages:[0m
Information giving: 148 times (20.85%)
Sensible,nonpersonal arguments: 137 times (19.3%)
Information seeking: 92 times (12.96%)
Union: 50 times (7.04%)
Personal defensiveness: 40 times (5.63%)
Stonewalling: 31 times (4.37%)
Management: 28 times (3.94%)
Direction of traffic: 24 times (3.38%)
Tension management: 24 times (3.38%)
Unclassified verbal behaviors: 24 times (3.38%)
Motion seconding: 20 times (2.82%)
Making statements in support of another person's argument: 16 times (2.25%)
Motion making: 15 times (2.11%)
Making specific suggestions for action: 13 times (1.83%)
Agreeing reluctantly: 12 times (1.69%)
Personal gains: 10 times (1.41%)
Corporate interests: 8 times (1.13%)
Attempts to propose new board topics: 7 times (0.99%)
Collective spirit and solidarity moves: 5 times (0.7%)
People (workers) as distinct from union or the union leadership: 5 times (0.7%)
Unknown: 1 times (0.14%)


## Verbal contribution after 4h

In [24]:
print("\033[1;36mVerbal Contribution Percentages:\033[0m") 
for verbal_contribution, data in translated_verbal_counts.items():
    if verbal_contribution != verbal_contribution_codes.get(focus_category, focus_category):
        print(f"{verbal_contribution}: {data['Count']} times ({data['Percentage']})")

[1;36mVerbal Contribution Percentages:[0m
Information giving: 125 times (17.61%)
Sensible,nonpersonal arguments: 124 times (17.46%)
Information seeking: 94 times (13.24%)
Personal defensiveness: 49 times (6.9%)
Union: 46 times (6.48%)
Stonewalling: 39 times (5.49%)
Tension management: 36 times (5.07%)
Direction of traffic: 27 times (3.8%)
Agreeing reluctantly: 27 times (3.8%)
Management: 24 times (3.38%)
Unclassified verbal behaviors: 23 times (3.24%)
Making statements in support of another person's argument: 20 times (2.82%)
Motion making: 18 times (2.54%)
Motion seconding: 14 times (1.97%)
Corporate interests: 11 times (1.55%)
Making specific suggestions for action: 11 times (1.55%)
Personal gains: 10 times (1.41%)
Attempts to propose new board topics: 8 times (1.13%)
People (workers) as distinct from union or the union leadership: 4 times (0.56%)
