In [1]:
import pandas as pd
import os
import sys
from pathlib import Path
from datetime import datetime
import subprocess
import re

from difflib import SequenceMatcher
from docx import Document
from pptx import Presentation
from langdetect import detect, LangDetectException

# pandas formatting
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)
pd.set_option('max_colwidth', 200)
pd.set_option('display.float_format', '{:.1f}'.format)

# import file
meeting_folder = "sept_24_coordinators_meeting"

In [2]:
def extract_docx_content(file_path):
    """Extract text from .docx file with structure information."""
    doc = Document(file_path)
    content = []
    
    for para in doc.paragraphs:
        if para.text.strip():
            element_type = 'heading' if para.style.name.startswith('Heading') else 'paragraph'
            content.append({
                'text': para.text,
                'element_type': element_type,
                'style': para.style.name
            })
    
    if doc.tables:
        for table_idx, table in enumerate(doc.tables):
            for row_idx, row in enumerate(table.rows):
                row_data = [cell.text for cell in row.cells]
                content.append({
                    'text': ' | '.join(row_data),
                    'element_type': 'table_row',
                    'style': f'table_{table_idx}_row_{row_idx}'
                })
    
    return content

def extract_pptx_content(file_path):
    """Extract text from .pptx file with slide information."""
    prs = Presentation(file_path)
    content = []
    
    for slide_idx, slide in enumerate(prs.slides):
        for shape_idx, shape in enumerate(slide.shapes):
            if hasattr(shape, "text") and shape.text.strip():
                content.append({
                    'text': shape.text,
                    'element_type': 'slide_text',
                    'style': f'slide_{slide_idx}_shape_{shape_idx}'
                })
            
            if shape.has_table:
                table = shape.table
                for row_idx, row in enumerate(table.rows):
                    row_data = [cell.text for cell in row.cells]
                    content.append({
                        'text': ' | '.join(row_data),
                        'element_type': 'table_row',
                        'style': f'slide_{slide_idx}_table_row_{row_idx}'
                    })
        
        if slide.has_notes_slide:
            notes_text = slide.notes_slide.notes_text_frame.text
            if notes_text.strip():
                content.append({
                    'text': notes_text,
                    'element_type': 'slide_notes',
                    'style': f'slide_{slide_idx}_notes'
                })
    
    return content

def process_meeting_folder(folder_path):
    """Process all files in meeting folder and return structured DataFrame."""
    all_data = []
    
    for file_name in sorted(os.listdir(folder_path)):
        file_path = os.path.join(folder_path, file_name)
        
        if not os.path.isfile(file_path):
            continue
        
        file_ext = Path(file_path).suffix.lower()
        content = []
        
        if file_ext == '.docx':
            content = extract_docx_content(file_path)
        elif file_ext == '.pptx':
            content = extract_pptx_content(file_path)
        
        for item in content:
            all_data.append({
                'source_file': file_name,
                'source_type': file_ext,
                'text': item['text'],
                'element_type': item['element_type'],
                'style': item['style'],
                'extraction_date': datetime.now().isoformat()
            })
    
    df = pd.DataFrame(all_data)
    return df

# Phase 2: Data Parsing

In [3]:
print("Ensuring correct versions of dependencies...")
subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "python-docx"])
subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "python-pptx"])
subprocess.check_call([sys.executable, "-m", "pip", "install", "langdetect"])

print("\nDependencies installed/upgraded successfully!")

Ensuring correct versions of dependencies...

Dependencies installed/upgraded successfully!


## Step 1: Extract text from Word documents

In [4]:
docx_files = [f for f in os.listdir(meeting_folder) if f.endswith('.docx')]
print(f"Found {len(docx_files)} Word documents:")
for f in docx_files:
    print(f"  - {f}")

docx_data = []
for file_name in docx_files:
    file_path = os.path.join(meeting_folder, file_name)
    print(f"\nProcessing: {file_name}")
    content = extract_docx_content(file_path)
    print(f"  Extracted {len(content)} elements")
    for item in content:
        docx_data.append({
            'source_file': file_name,
            'source_type': '.docx',
            'text': item['text'],
            'element_type': item['element_type'],
            'style': item['style']
        })

df_docx = pd.DataFrame(docx_data)
print(f"\nTotal DOCX rows: {len(df_docx)}")
df_docx.head(10)

Found 7 Word documents:
  - Centralization of web and publication.docx
  - Coordinators F2F Agenda.docx
  - F2F Action Items.docx
  - F2F Meeting Notes (draft).docx
  - F2F Meeting Report (near final).docx
  - F2F Meeting Report (near final)_TG_FR_LS_Final.docx
  - Options and best practices for timely publication v2.docx

Processing: Centralization of web and publication.docx
  Extracted 25 elements

Processing: Coordinators F2F Agenda.docx
  Extracted 60 elements

Processing: F2F Action Items.docx
  Extracted 18 elements

Processing: F2F Meeting Notes (draft).docx
  Extracted 174 elements

Processing: F2F Meeting Report (near final).docx
  Extracted 173 elements

Processing: F2F Meeting Report (near final)_TG_FR_LS_Final.docx
  Extracted 173 elements

Processing: Options and best practices for timely publication v2.docx
  Extracted 47 elements

Total DOCX rows: 670


Unnamed: 0,source_file,source_type,text,element_type,style
0,Centralization of web and publication.docx,.docx,Centralization of web and publication,heading,Heading 1
1,Centralization of web and publication.docx,.docx,Goals: to find efficiencies (time and capacity) in the publication process of the CSAS wheel.,paragraph,Normal
2,Centralization of web and publication.docx,.docx,Challenges: As per the 2018 evaluation to reduce publications timelines.,paragraph,Normal
3,Centralization of web and publication.docx,.docx,Task of the NCR web and pub team (adapted from CSAS Roles and Responsibilities 2015 document),heading,Heading 2
4,Centralization of web and publication.docx,.docx,Information management and technology,paragraph,List Paragraph
5,Centralization of web and publication.docx,.docx,Liaise with CDOS on issues related to CSAS database,paragraph,List Paragraph
6,Centralization of web and publication.docx,.docx,Contribute to modernization of IT Tools,paragraph,List Paragraph
7,Centralization of web and publication.docx,.docx,Management of national shared drive (content and access),paragraph,List Paragraph
8,Centralization of web and publication.docx,.docx,Knowledge dissemination,paragraph,List Paragraph
9,Centralization of web and publication.docx,.docx,Publish CSAS documents,paragraph,List Paragraph


## Step 2: Extract text from PowerPoint presentations

In [5]:
pptx_files = [f for f in os.listdir(meeting_folder) if f.endswith('.pptx')]
print(f"Found {len(pptx_files)} PowerPoint presentations:")
for f in pptx_files:
    print(f"  - {f}")

pptx_data = []
for file_name in pptx_files:
    file_path = os.path.join(meeting_folder, file_name)
    print(f"\nProcessing: {file_name}")
    content = extract_pptx_content(file_path)
    print(f"  Extracted {len(content)} elements")
    for item in content:
        pptx_data.append({
            'source_file': file_name,
            'source_type': '.pptx',
            'text': item['text'],
            'element_type': item['element_type'],
            'style': item['style']
        })

df_pptx = pd.DataFrame(pptx_data)
print(f"\nTotal PPTX rows: {len(df_pptx)}")
df_pptx.head(10)

Found 5 PowerPoint presentations:
  - CSAS Publications.pptx
  - CSAS Transformation update-FR.pptx
  - CSAS Transformation update.pptx
  - Process vs Product.pptx
  - Survival exericise.pptx

Processing: CSAS Publications.pptx
  Extracted 23 elements

Processing: CSAS Transformation update-FR.pptx
  Extracted 61 elements

Processing: CSAS Transformation update.pptx
  Extracted 61 elements

Processing: Process vs Product.pptx
  Extracted 15 elements

Processing: Survival exericise.pptx
  Extracted 32 elements

Total PPTX rows: 192


Unnamed: 0,source_file,source_type,text,element_type,style
0,CSAS Publications.pptx,.pptx,CSAS Publications,slide_text,slide_0_shape_0
1,CSAS Publications.pptx,.pptx,CSAS Coordinators F2F Meeting\nSeptember 2024,slide_text,slide_0_shape_1
2,CSAS Publications.pptx,.pptx,Objective of Discussion,slide_text,slide_1_shape_0
3,CSAS Publications.pptx,.pptx,To further explore options for facilitating timely publications\nTo develop recommendations for the Science Executive Committee (as part of reporting on status of overdue publications)\n\n“overdue...,slide_text,slide_1_shape_1
4,CSAS Publications.pptx,.pptx,Review of Status,slide_text,slide_2_shape_0
5,CSAS Publications.pptx,.pptx,Date​ | “Overdue publications”​\n(from meetings in 2020 and earlier)​,table_row,slide_2_table_row_0
6,CSAS Publications.pptx,.pptx,December 2021​ | 455​,table_row,slide_2_table_row_1
7,CSAS Publications.pptx,.pptx,February 2022​ | 381​,table_row,slide_2_table_row_2
8,CSAS Publications.pptx,.pptx,March 2022​ | 377​,table_row,slide_2_table_row_3
9,CSAS Publications.pptx,.pptx,May 2022​ | 342​,table_row,slide_2_table_row_4


## Step 3: Combine into master DataFrame

In [6]:
df_raw = pd.concat([df_docx, df_pptx], ignore_index=True)
print(f"Combined raw extraction: {len(df_raw)} total rows")
print(f"\nBreakdown by source type:")
print(df_raw['source_type'].value_counts())
print(f"\nBreakdown by element type:")
print(df_raw['element_type'].value_counts())

# Add index for tracking
df_raw.insert(0, 'row_id', range(1, len(df_raw) + 1))

print(f"\nFirst 10 rows:")
df_raw.head(10)

Combined raw extraction: 862 total rows

Breakdown by source type:
source_type
.docx    670
.pptx    192
Name: count, dtype: int64

Breakdown by element type:
element_type
paragraph      421
table_row      295
slide_text     120
heading         23
slide_notes      3
Name: count, dtype: int64

First 10 rows:


Unnamed: 0,row_id,source_file,source_type,text,element_type,style
0,1,Centralization of web and publication.docx,.docx,Centralization of web and publication,heading,Heading 1
1,2,Centralization of web and publication.docx,.docx,Goals: to find efficiencies (time and capacity) in the publication process of the CSAS wheel.,paragraph,Normal
2,3,Centralization of web and publication.docx,.docx,Challenges: As per the 2018 evaluation to reduce publications timelines.,paragraph,Normal
3,4,Centralization of web and publication.docx,.docx,Task of the NCR web and pub team (adapted from CSAS Roles and Responsibilities 2015 document),heading,Heading 2
4,5,Centralization of web and publication.docx,.docx,Information management and technology,paragraph,List Paragraph
5,6,Centralization of web and publication.docx,.docx,Liaise with CDOS on issues related to CSAS database,paragraph,List Paragraph
6,7,Centralization of web and publication.docx,.docx,Contribute to modernization of IT Tools,paragraph,List Paragraph
7,8,Centralization of web and publication.docx,.docx,Management of national shared drive (content and access),paragraph,List Paragraph
8,9,Centralization of web and publication.docx,.docx,Knowledge dissemination,paragraph,List Paragraph
9,10,Centralization of web and publication.docx,.docx,Publish CSAS documents,paragraph,List Paragraph


## Step 4: Validate extraction quality

In [7]:
print("=== EXTRACTION QUALITY ASSESSMENT ===\n")

print(f"Total rows extracted: {len(df_raw)}")
print(f"Total characters: {df_raw['text'].str.len().sum():,}")
print(f"Average text length per row: {df_raw['text'].str.len().mean():.1f} chars")

print(f"\n--- Null values ---")
print(df_raw.isnull().sum())

print(f"\n--- Text length distribution ---")
print(df_raw['text'].str.len().describe())

print(f"\n--- Files processed ---")
for source_file in sorted(df_raw['source_file'].unique()):
    count = len(df_raw[df_raw['source_file'] == source_file])
    total_chars = df_raw[df_raw['source_file'] == source_file]['text'].str.len().sum()
    print(f"  {source_file:<60} {count:>4} rows  {total_chars:>8,} chars")

print(f"\n--- Element types extracted ---")
for elem_type in sorted(df_raw['element_type'].unique()):
    count = len(df_raw[df_raw['element_type'] == elem_type])
    print(f"  {elem_type:<30} {count:>4} rows")

print("\nExtraction completed successfully!")

=== EXTRACTION QUALITY ASSESSMENT ===

Total rows extracted: 862
Total characters: 103,971
Average text length per row: 120.6 chars

--- Null values ---
row_id          0
source_file     0
source_type     0
text            0
element_type    0
style           0
dtype: int64

--- Text length distribution ---
count   862.0
mean    120.6
std     135.0
min       1.0
25%      28.0
50%      72.0
75%     161.5
max     865.0
Name: text, dtype: float64

--- Files processed ---
  CSAS Publications.pptx                                         23 rows     1,665 chars
  CSAS Transformation update-FR.pptx                             61 rows     7,141 chars
  CSAS Transformation update.pptx                                61 rows     5,561 chars
  Centralization of web and publication.docx                     25 rows     2,662 chars
  Coordinators F2F Agenda.docx                                   60 rows     5,478 chars
  F2F Action Items.docx                                          18 rows       470 

## Phase 3: Data Cleaning & Normalization

### Step 1: Add language detection

In [8]:
def detect_language(text):
    if not text or len(text.strip()) < 3:
        return 'unknown'
    try:
        lang = detect(text)
        return lang
    except LangDetectException:
        return 'unknown'

df_clean = df_raw.copy()
df_clean['language'] = df_clean['text'].apply(detect_language)

print("Language detection completed.")
print(f"\nLanguage distribution:")
print(df_clean['language'].value_counts())

print(f"\n--- Language by source file ---")
lang_summary = df_clean.groupby(['source_file', 'language']).size().unstack(fill_value=0)
print(lang_summary)

print(f"\n--- Top rows by language (en=English, fr=French, other) ---")
for lang in sorted(df_clean['language'].unique()):
    sample = df_clean[df_clean['language'] == lang].head(3)
    print(f"\n{lang.upper()}:")
    for idx, row in sample.iterrows():
        print(f"  [{row['source_file']}] {row['text'][:80]}...")

Language detection completed.

Language distribution:
language
en         493
fr         227
de          35
unknown     30
tl          20
ca          11
it          10
nl           5
sw           4
ro           4
pt           4
so           3
hu           3
id           3
vi           2
af           2
es           1
sv           1
fi           1
no           1
hr           1
cy           1
Name: count, dtype: int64

--- Language by source file ---
language                                                   af  ca  cy  de  \
source_file                                                                 
CSAS Publications.pptx                                      0   0   0   2   
CSAS Transformation update-FR.pptx                          0   1   0   0   
CSAS Transformation update.pptx                             0   0   0   0   
Centralization of web and publication.docx                  0   0   0   0   
Coordinators F2F Agenda.docx                                0   0   0   7   
F2F Actio

### Step 2: Identify duplicate and near-duplicate content

In [9]:
def similarity_ratio(a, b):
    return SequenceMatcher(None, a, b).ratio()

print("=== DUPLICATE AND NEAR-DUPLICATE ANALYSIS ===\n")

# Check exact duplicates
exact_dups = df_clean[df_clean.duplicated(subset=['text'], keep=False)].sort_values('text')
print(f"Exact duplicate rows: {len(exact_dups)}")
if len(exact_dups) > 0:
    print("Exact duplicates found:")
    for text in exact_dups['text'].unique():
        sources = df_clean[df_clean['text'] == text]['source_file'].unique()
        print(f"  Text appears in: {sources}")

# Known pairs to check: F2F Meeting Report versions and Transformation presentations
print("\n--- Known document pairs (for manual review) ---")
print("1. F2F Meeting Report (near final).docx vs (near final)_TG_FR_LS_Final.docx")
f2f_1 = df_clean[df_clean['source_file'] == 'F2F Meeting Report (near final).docx']
f2f_2 = df_clean[df_clean['source_file'] == 'F2F Meeting Report (near final)_TG_FR_LS_Final.docx']
print(f"   First version: {len(f2f_1)} rows, Second version: {len(f2f_2)} rows")

print("\n2. CSAS Transformation update.pptx (EN) vs (FR)")
trans_en = df_clean[df_clean['source_file'] == 'CSAS Transformation update.pptx']
trans_fr = df_clean[df_clean['source_file'] == 'CSAS Transformation update-FR.pptx']
print(f"   English: {len(trans_en)} rows, French: {len(trans_fr)} rows")
print(f"   (Both likely contain same content in different languages)")

# Sample of potential near-duplicates across reports
print("\n--- Checking for near-duplicates across meeting report documents ---")
meeting_reports = df_clean[df_clean['source_file'].str.contains('F2F Meeting Report')]
for idx, (i, row1) in enumerate(meeting_reports.iterrows()):
    for j, row2 in meeting_reports.iloc[idx+1:].iterrows():
        if len(row1['text']) > 20 and len(row2['text']) > 20:
            sim = similarity_ratio(row1['text'], row2['text'])
            if sim > 0.95:
                print(f"  High similarity (95%+) between documents")
                print(f"    File 1: {row1['source_file']}")
                print(f"    File 2: {row2['source_file']}")

=== DUPLICATE AND NEAR-DUPLICATE ANALYSIS ===

Exact duplicate rows: 442
Exact duplicates found:
  Text appears in: ['CSAS Transformation update-FR.pptx' 'CSAS Transformation update.pptx']
  Text appears in: ['CSAS Transformation update-FR.pptx' 'CSAS Transformation update.pptx']
  Text appears in: ['Coordinators F2F Agenda.docx' 'F2F Meeting Notes (draft).docx'
 'F2F Meeting Report (near final).docx']
  Text appears in: ['Coordinators F2F Agenda.docx' 'F2F Meeting Notes (draft).docx'
 'F2F Meeting Report (near final).docx']
  Text appears in: ['Coordinators F2F Agenda.docx' 'F2F Meeting Notes (draft).docx'
 'F2F Meeting Report (near final).docx']
  Text appears in: ['CSAS Transformation update-FR.pptx' 'CSAS Transformation update.pptx']
  Text appears in: ['Coordinators F2F Agenda.docx' 'F2F Meeting Notes (draft).docx'
 'F2F Meeting Report (near final).docx']
  Text appears in: ['CSAS Transformation update-FR.pptx' 'CSAS Transformation update.pptx']
  Text appears in: ['Coordinators F

### Step 3: Extract action items and keywords

In [10]:


action_keywords = [
    'action item', 'action:', 'to do', 'todo:', 'task:', 'deliver',
    'responsibility', 'owner:', 'by:', 'deadline', 'date:', 
    'follow-up', 'follow up', 'next step', 'will', 'should',
    'needs to', 'required', 'must', 'implement', 'complete'
]

recommendation_keywords = [
    'recommendation', 'recommend', 'suggest', 'propose', 'best practice',
    'option', 'approach', 'consideration', 'should consider'
]

contention_keywords = [
    'concern', 'issue', 'challenge', 'problem', 'risk', 'conflict',
    'disagreement', 'debate', 'discuss', 'decision needed',
    'pending', 'unclear', 'undefined'
]

def check_content_type(text, keyword_list):
    text_lower = text.lower()
    return any(keyword in text_lower for keyword in keyword_list)

df_clean['is_action_item'] = df_clean['text'].apply(
    lambda x: check_content_type(x, action_keywords)
)
df_clean['is_recommendation'] = df_clean['text'].apply(
    lambda x: check_content_type(x, recommendation_keywords)
)
df_clean['is_contention'] = df_clean['text'].apply(
    lambda x: check_content_type(x, contention_keywords)
)

print("=== CONTENT CATEGORIZATION ===\n")
print(f"Action items identified: {df_clean['is_action_item'].sum()}")
print(f"Recommendations identified: {df_clean['is_recommendation'].sum()}")
print(f"Contentious/issue items: {df_clean['is_contention'].sum()}")

print("\n--- Sample Action Items ---")
action_samples = df_clean[df_clean['is_action_item']].head(10)
for idx, row in action_samples.iterrows():
    print(f"[{row['source_file']}] {row['text'][:100]}")

print("\n--- Sample Recommendations ---")
rec_samples = df_clean[df_clean['is_recommendation']].head(10)
for idx, row in rec_samples.iterrows():
    print(f"[{row['source_file']}] {row['text'][:100]}")

print("\n--- Sample Contentious Items ---")
cont_samples = df_clean[df_clean['is_contention']].head(10)
for idx, row in cont_samples.iterrows():
    print(f"[{row['source_file']}] {row['text'][:100]}")

=== CONTENT CATEGORIZATION ===

Action items identified: 129
Recommendations identified: 66
Contentious/issue items: 124

--- Sample Action Items ---
[Centralization of web and publication.docx] Develop and implement national web publication guidelines, processes, and templates
[Centralization of web and publication.docx] Ensure online calendar is up to date: update CSAS calendar Code and post CSAS documents (Terms of Re
[Centralization of web and publication.docx] To build relationships and improve quality of translations, web and pub team should scan and verify 
[Centralization of web and publication.docx] Hire “Finalizer” in the NCR web and pub team. Regions with finalizers have lost these people and pos
[Coordinators F2F Agenda.docx] 10:00 to 11:00 | CSAS Transformation
Objective: 
to share updates on Transformation 
to seek input o
[Coordinators F2F Agenda.docx] 11:00 to 12:00 | CSAS DM App Needs Assessment – 
Phase I (Requests for Advice)
Objective:
to review 
[Coordinators F2F A

### Step 4: Normalize text and generate summary statistics

In [11]:
def normalize_text(text):
    text = text.strip()
    text = re.sub(r'\s+', ' ', text)
    return text

df_clean['text_normalized'] = df_clean['text'].apply(normalize_text)

print("=== TEXT NORMALIZATION & CLEANING SUMMARY ===\n")

# Check for empty/very short entries after normalization
short_entries = df_clean[df_clean['text_normalized'].str.len() < 5]
print(f"Very short entries (<5 chars): {len(short_entries)}")

# Check for common formatting issues
print(f"\nFormatting checks:")
print(f"  Entries with multiple spaces normalized: {(df_clean['text'] != df_clean['text_normalized']).sum()}")

# Summary by document
print("\n--- Summary Statistics by Source File ---")
summary_stats = df_clean.groupby('source_file').agg({
    'row_id': 'count',
    'language': lambda x: x.value_counts().to_dict(),
    'is_action_item': 'sum',
    'is_recommendation': 'sum',
    'is_contention': 'sum'
}).rename(columns={'row_id': 'total_rows'})

for source_file in sorted(df_clean['source_file'].unique()):
    subset = df_clean[df_clean['source_file'] == source_file]
    total = len(subset)
    lang_dist = subset['language'].value_counts().to_dict()
    actions = subset['is_action_item'].sum()
    recs = subset['is_recommendation'].sum()
    contents = subset['is_contention'].sum()
    
    print(f"\n{source_file}")
    print(f"  Total rows: {total}")
    print(f"  Languages: {lang_dist}")
    print(f"  Action items: {actions}, Recommendations: {recs}, Contentions: {contents}")

# Export cleaned data for next phase
df_clean.to_pickle('meeting_data_cleaned.pkl')
df_clean.to_csv('meeting_data_cleaned.csv', index=False)
print("\n\nCleaned data saved to meeting_data_cleaned.pkl and meeting_data_cleaned.csv")

=== TEXT NORMALIZATION & CLEANING SUMMARY ===

Very short entries (<5 chars): 32

Formatting checks:
  Entries with multiple spaces normalized: 425

--- Summary Statistics by Source File ---

CSAS Publications.pptx
  Total rows: 23
  Languages: {'en': 15, 'de': 2, 'fr': 2, 'nl': 1, 'tl': 1, 'no': 1, 'ro': 1}
  Action items: 2, Recommendations: 4, Contentions: 1

CSAS Transformation update-FR.pptx
  Total rows: 61
  Languages: {'fr': 37, 'unknown': 15, 'en': 8, 'ca': 1}
  Action items: 3, Recommendations: 2, Contentions: 5

CSAS Transformation update.pptx
  Total rows: 61
  Languages: {'en': 43, 'unknown': 15, 'hr': 1, 'it': 1, 'so': 1}
  Action items: 13, Recommendations: 2, Contentions: 7

Centralization of web and publication.docx
  Total rows: 25
  Languages: {'en': 24, 'fr': 1}
  Action items: 4, Recommendations: 1, Contentions: 3

Coordinators F2F Agenda.docx
  Total rows: 60
  Languages: {'en': 22, 'fr': 21, 'de': 7, 'sw': 4, 'nl': 2, 'tl': 2, 'id': 1, 'ro': 1}
  Action items: 5,