# JCC2 Section-Specific Visualizations

This notebook provides comprehensive visualizations for key sections of the JCC2 User Questionnaire:
- User Information
- Role and Echelon
- Operational JCC2 Experience
- MOP 1.1.1 (Intelligence Data)

Each section includes tailored visualizations based on the data types and patterns found.

## 1. Setup and Data Loading

In [None]:
# Standard imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Import JCC2 processor
from jcc2_data_processor import create_processor, DataFormat

# Set up visualization style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("Set2")

# Configure display options
pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 100)

# Color palette for consistent styling
COLORS = {
    'primary': '#2E86AB',
    'secondary': '#A23B72',
    'tertiary': '#F18F01',
    'quaternary': '#C73E1D',
    'success': '#52B788',
    'warning': '#F77F00',
    'info': '#5C946E'
}

In [None]:
# Load data
csv_file = "mock_20_jcc2_user_questionnaire.csv"
processor = create_processor(csv_file)
df = processor.load_data()

print(f"Loaded {len(df)} responses")
print(f"Data format: {processor.format_type.value}")
print(f"\nSections to analyze:")
sections_to_analyze = ['user_information', 'role_and_echelon', 'operational_jcc2_experience', 'mop_1_1_1']
for section in sections_to_analyze:
    if section in processor.sections:
        print(f"  - {section}: {len(processor.sections[section])} fields")

## 2. User Information Section Analysis

In [None]:
# Analyze user_information section
user_info_summary = processor.get_section_summary('user_information')
print("User Information Fields:")
for field, data in user_info_summary['field_summaries'].items():
    print(f"  - {field}: {data['field_type']} (completion: {data['completion_rate']:.1%})")

In [None]:
# User Information Visualizations
fig = plt.figure(figsize=(20, 12))

# 1. Event participation distribution
ax1 = plt.subplot(2, 3, 1)
event_data = df['user_information.event'].value_counts()
event_data.plot(kind='bar', ax=ax1, color=COLORS['primary'])
ax1.set_title('Distribution of Events', fontsize=14, fontweight='bold')
ax1.set_xlabel('Event Name')
ax1.set_ylabel('Number of Participants')
ax1.tick_params(axis='x', rotation=45)

# 2. Participation timeline
ax2 = plt.subplot(2, 3, 2)
df['user_information.date'] = pd.to_datetime(df['user_information.date'])
date_counts = df['user_information.date'].value_counts().sort_index()
date_counts.plot(kind='line', ax=ax2, color=COLORS['secondary'], marker='o')
ax2.set_title('Participation Timeline', fontsize=14, fontweight='bold')
ax2.set_xlabel('Date')
ax2.set_ylabel('Number of Responses')
ax2.tick_params(axis='x', rotation=45)

# 3. Unit distribution (top 10)
ax3 = plt.subplot(2, 3, 3)
unit_data = df['user_information.unit'].value_counts().head(10)
unit_data.plot(kind='barh', ax=ax3, color=COLORS['tertiary'])
ax3.set_title('Top 10 Units by Participation', fontsize=14, fontweight='bold')
ax3.set_xlabel('Number of Participants')
ax3.set_ylabel('Unit')

# 4. Rank distribution word cloud style (simplified as bar chart)
ax4 = plt.subplot(2, 3, 4)
rank_data = df['user_information.rank_name'].value_counts().head(15)
rank_data.plot(kind='pie', ax=ax4, autopct='%1.1f%%', startangle=90)
ax4.set_title('Rank Distribution', fontsize=14, fontweight='bold')
ax4.set_ylabel('')

# 5. Contact information completion rates
ax5 = plt.subplot(2, 3, 5)
contact_fields = ['user_information.email', 'user_information.phone']
completion_rates = [df[field].notna().mean() for field in contact_fields]
ax5.bar(['Email', 'Phone'], completion_rates, color=[COLORS['success'], COLORS['info']])
ax5.set_title('Contact Information Completion Rates', fontsize=14, fontweight='bold')
ax5.set_ylabel('Completion Rate')
ax5.set_ylim(0, 1)
for i, v in enumerate(completion_rates):
    ax5.text(i, v + 0.02, f'{v:.1%}', ha='center', fontweight='bold')

# 6. Data quality summary
ax6 = plt.subplot(2, 3, 6)
quality_data = {
    'Complete Records': (df[['user_information.event', 'user_information.date', 
                            'user_information.rank_name', 'user_information.unit']].notna().all(axis=1)).sum(),
    'Partial Records': len(df) - (df[['user_information.event', 'user_information.date', 
                                      'user_information.rank_name', 'user_information.unit']].notna().all(axis=1)).sum()
}
ax6.pie(quality_data.values(), labels=quality_data.keys(), autopct='%1.1f%%', 
        colors=[COLORS['success'], COLORS['warning']], startangle=90)
ax6.set_title('Data Completeness', fontsize=14, fontweight='bold')

plt.suptitle('User Information Section Analysis', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()

## 3. Role and Echelon Section Analysis

In [None]:
# Analyze role_and_echelon section
role_summary = processor.get_section_summary('role_and_echelon')
print("Role and Echelon Fields:")
for field, data in role_summary['field_summaries'].items():
    print(f"  - {field}: {data['field_type']} (completion: {data['completion_rate']:.1%})")

In [None]:
# Role and Echelon Visualizations
fig = plt.figure(figsize=(20, 15))

# 1. Current role status distribution
ax1 = plt.subplot(3, 3, 1)
role_status = df['role_and_echelon.current_role_status'].value_counts()
colors = [COLORS['primary'], COLORS['secondary'], COLORS['tertiary'], COLORS['quaternary']][:len(role_status)]
role_status.plot(kind='pie', ax=ax1, autopct='%1.1f%%', colors=colors, startangle=45)
ax1.set_title('Current Role Status Distribution', fontsize=14, fontweight='bold')
ax1.set_ylabel('')

# 2. Cyber operator distribution
ax2 = plt.subplot(3, 3, 2)
cyber_op = df['role_and_echelon.is_cyber_operator'].value_counts()
cyber_op.plot(kind='bar', ax=ax2, color=[COLORS['success'], COLORS['warning']])
ax2.set_title('Cyber Operator Distribution', fontsize=14, fontweight='bold')
ax2.set_xlabel('Is Cyber Operator')
ax2.set_ylabel('Count')
ax2.tick_params(axis='x', rotation=0)
for i, v in enumerate(cyber_op.values):
    ax2.text(i, v + 0.5, str(v), ha='center', fontweight='bold')

# 3. Echelon distribution (for cyber operators)
ax3 = plt.subplot(3, 3, 3)
cyber_ops_df = df[df['role_and_echelon.is_cyber_operator'] == 'Yes']
if not cyber_ops_df.empty and 'role_and_echelon.echelon' in cyber_ops_df.columns:
    # Parse echelon data (assuming it's a multi-select field)
    echelon_counts = {}
    for echelons in cyber_ops_df['role_and_echelon.echelon'].dropna():
        if isinstance(echelons, list):
            for echelon in echelons:
                echelon_counts[echelon] = echelon_counts.get(echelon, 0) + 1
        else:
            # Handle if it's stored as string
            for echelon in str(echelons).split('; '):
                if echelon:
                    echelon_counts[echelon] = echelon_counts.get(echelon, 0) + 1
    
    if echelon_counts:
        echelon_df = pd.Series(echelon_counts).sort_values(ascending=True)
        echelon_df.plot(kind='barh', ax=ax3, color=COLORS['info'])
        ax3.set_title('Echelon Levels (Cyber Operators)', fontsize=14, fontweight='bold')
        ax3.set_xlabel('Count')
else:
    ax3.text(0.5, 0.5, 'No cyber operator echelon data', ha='center', va='center', transform=ax3.transAxes)
    ax3.set_title('Echelon Levels (Cyber Operators)', fontsize=14, fontweight='bold')

# 4. Duties distribution (multi-select analysis)
ax4 = plt.subplot(3, 3, 4)
duties_counts = {}
for duties in df['role_and_echelon.duties'].dropna():
    if isinstance(duties, list):
        for duty in duties:
            duties_counts[duty] = duties_counts.get(duty, 0) + 1
    else:
        # Handle if it's stored as string
        for duty in str(duties).split('; '):
            if duty:
                duties_counts[duty] = duties_counts.get(duty, 0) + 1

if duties_counts:
    duties_df = pd.Series(duties_counts).sort_values(ascending=False)
    duties_df.plot(kind='bar', ax=ax4, color=COLORS['primary'])
    ax4.set_title('Primary Duties Distribution', fontsize=14, fontweight='bold')
    ax4.set_xlabel('Duty Type')
    ax4.set_ylabel('Count')
    ax4.tick_params(axis='x', rotation=45)

# 5. Cyber division/team distribution
ax5 = plt.subplot(3, 3, 5)
if 'role_and_echelon.cyber_ops_division_team' in df.columns:
    cyber_teams = df['role_and_echelon.cyber_ops_division_team'].dropna().value_counts().head(10)
    if not cyber_teams.empty:
        cyber_teams.plot(kind='barh', ax=ax5, color=COLORS['secondary'])
        ax5.set_title('Top 10 Cyber Ops Divisions/Teams', fontsize=14, fontweight='bold')
        ax5.set_xlabel('Count')
    else:
        ax5.text(0.5, 0.5, 'No cyber team data available', ha='center', va='center', transform=ax5.transAxes)

# 6. Role status by cyber operator status
ax6 = plt.subplot(3, 3, 6)
role_cyber_cross = pd.crosstab(df['role_and_echelon.current_role_status'], 
                               df['role_and_echelon.is_cyber_operator'])
role_cyber_cross.plot(kind='bar', ax=ax6, color=[COLORS['success'], COLORS['warning']])
ax6.set_title('Role Status by Cyber Operator Status', fontsize=14, fontweight='bold')
ax6.set_xlabel('Current Role Status')
ax6.set_ylabel('Count')
ax6.legend(title='Is Cyber Operator')
ax6.tick_params(axis='x', rotation=45)

# 7. Duty combinations heatmap
ax7 = plt.subplot(3, 3, 7)
# Create a co-occurrence matrix for duties
duty_combinations = []
for duties in df['role_and_echelon.duties'].dropna():
    if isinstance(duties, list):
        duty_combinations.append(duties)
    else:
        duty_combinations.append(str(duties).split('; '))

# Count co-occurrences
unique_duties = list(duties_counts.keys()) if duties_counts else []
co_matrix = pd.DataFrame(0, index=unique_duties[:6], columns=unique_duties[:6])  # Top 6 for readability

for combo in duty_combinations:
    for i, duty1 in enumerate(combo):
        for duty2 in combo[i:]:
            if duty1 in co_matrix.index and duty2 in co_matrix.columns:
                co_matrix.loc[duty1, duty2] += 1
                if duty1 != duty2:
                    co_matrix.loc[duty2, duty1] += 1

if not co_matrix.empty:
    sns.heatmap(co_matrix, annot=True, fmt='d', cmap='YlOrRd', ax=ax7, cbar_kws={'label': 'Co-occurrence Count'})
    ax7.set_title('Duty Combinations Heatmap', fontsize=14, fontweight='bold')
    ax7.tick_params(axis='x', rotation=45)
    ax7.tick_params(axis='y', rotation=0)

# 8. Other duties word frequency (simplified)
ax8 = plt.subplot(3, 3, 8)
other_duties = df['role_and_echelon.other_duties'].dropna()
if not other_duties.empty:
    # Simple word frequency analysis
    all_words = ' '.join(other_duties.astype(str)).lower().split()
    word_freq = pd.Series(all_words).value_counts().head(20)
    word_freq.plot(kind='barh', ax=ax8, color=COLORS['info'])
    ax8.set_title('Top 20 Words in "Other Duties"', fontsize=14, fontweight='bold')
    ax8.set_xlabel('Frequency')
else:
    ax8.text(0.5, 0.5, 'No "other duties" data', ha='center', va='center', transform=ax8.transAxes)

# 9. Completeness by field
ax9 = plt.subplot(3, 3, 9)
role_fields = [col for col in df.columns if col.startswith('role_and_echelon.')]
completeness = [(col.split('.')[-1], df[col].notna().mean()) for col in role_fields]
completeness_df = pd.DataFrame(completeness, columns=['Field', 'Completion Rate']).sort_values('Completion Rate')
completeness_df.plot(x='Field', y='Completion Rate', kind='barh', ax=ax9, color=COLORS['primary'], legend=False)
ax9.set_title('Field Completion Rates', fontsize=14, fontweight='bold')
ax9.set_xlabel('Completion Rate')
ax9.set_xlim(0, 1)
for i, (field, rate) in enumerate(completeness_df.values):
    ax9.text(rate + 0.02, i, f'{rate:.1%}', va='center', fontsize=9)

plt.suptitle('Role and Echelon Section Analysis', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()

## 4. Operational JCC2 Experience Section Analysis

In [None]:
# Analyze operational_jcc2_experience section
ops_summary = processor.get_section_summary('operational_jcc2_experience')
print("Operational JCC2 Experience Fields:")
experience_fields = {}
for field, data in ops_summary['field_summaries'].items():
    field_name = field.split('.')[-1]
    if field_name.startswith('exp_'):
        experience_fields[field_name] = data
        print(f"  - {field_name}: {data['field_type']} (completion: {data['completion_rate']:.1%})")

In [None]:
# Operational JCC2 Experience Visualizations
fig = plt.figure(figsize=(24, 20))

# Extract experience levels for all applications
app_experience = {}
experience_mapping = {
    'exp_cyberoperations': 'Cyber Operations',
    'exp_yourcurrentrole': 'Current Role',
    'exp_jcc2experience': 'JCC2 Experience',
    'exp_app_a2it': 'A2IT',
    'exp_app_cad': 'CAD',
    'exp_app_codex': 'Codex',
    'exp_app_crucible': 'Crucible',
    'exp_app_cyber9line': 'Cyber 9-Line',
    'exp_app_dispatch': 'Dispatch',
    'exp_app_jcc2cyberops': 'JCC2 Cyber-Ops',
    'exp_app_jcc2readiness': 'JCC2 Readiness',
    'exp_app_madss': 'MADSS',
    'exp_app_rally': 'Rally',
    'exp_app_redmap': 'REDMAP',
    'exp_app_sigact': 'SigAct',
    'exp_app_threathub': 'Threat Hub',
    'exp_app_triage': 'Triage',
    'exp_app_unity': 'Unity'
}

# 1. Overall experience distribution (first 3 fields)
ax1 = plt.subplot(4, 3, 1)
overall_exp_fields = ['exp_cyberoperations', 'exp_yourcurrentrole', 'exp_jcc2experience']
for i, field in enumerate(overall_exp_fields):
    if f'operational_jcc2_experience.{field}' in df.columns:
        exp_data = df[f'operational_jcc2_experience.{field}'].value_counts()
        exp_data.plot(kind='bar', ax=ax1, alpha=0.7, label=experience_mapping.get(field, field))
ax1.set_title('Overall Experience Levels', fontsize=14, fontweight='bold')
ax1.set_xlabel('Experience Level')
ax1.set_ylabel('Count')
ax1.legend()
ax1.tick_params(axis='x', rotation=45)

# 2. Application experience heatmap
ax2 = plt.subplot(4, 3, (2, 6))  # Span multiple cells
app_exp_data = []
exp_order = ['< 1 Year', '1-3 Years', '3-5 Years', '> 5 Years', 'NA']
for field, label in experience_mapping.items():
    if field.startswith('exp_app_') and f'operational_jcc2_experience.{field}' in df.columns:
        counts = df[f'operational_jcc2_experience.{field}'].value_counts()
        row_data = [counts.get(exp, 0) for exp in exp_order]
        app_exp_data.append(row_data)

if app_exp_data:
    app_labels = [v for k, v in experience_mapping.items() if k.startswith('exp_app_')]
    exp_matrix = pd.DataFrame(app_exp_data, index=app_labels[:len(app_exp_data)], columns=exp_order)
    sns.heatmap(exp_matrix, annot=True, fmt='d', cmap='YlOrRd', ax=ax2, cbar_kws={'label': 'Number of Users'})
    ax2.set_title('Application Experience Heatmap', fontsize=14, fontweight='bold')
    ax2.set_xlabel('Experience Level')
    ax2.set_ylabel('Application')

# 3. Experience distribution by application (stacked bar)
ax3 = plt.subplot(4, 3, (7, 9))  # Span multiple cells
if app_exp_data:
    exp_matrix_pct = exp_matrix.div(exp_matrix.sum(axis=1), axis=0) * 100
    exp_matrix_pct.plot(kind='barh', stacked=True, ax=ax3, colormap='viridis')
    ax3.set_title('Experience Distribution by Application (%)', fontsize=14, fontweight='bold')
    ax3.set_xlabel('Percentage')
    ax3.set_ylabel('Application')
    ax3.legend(title='Experience', bbox_to_anchor=(1.05, 1), loc='upper left')

# 4. Applications with most experienced users
ax4 = plt.subplot(4, 3, 10)
experienced_users = {}
for field, label in experience_mapping.items():
    if field.startswith('exp_app_') and f'operational_jcc2_experience.{field}' in df.columns:
        exp_counts = df[f'operational_jcc2_experience.{field}'].value_counts()
        experienced = exp_counts.get('> 5 Years', 0) + exp_counts.get('3-5 Years', 0)
        experienced_users[label] = experienced

if experienced_users:
    exp_series = pd.Series(experienced_users).sort_values(ascending=False).head(10)
    exp_series.plot(kind='bar', ax=ax4, color=COLORS['success'])
    ax4.set_title('Applications with Most Experienced Users (3+ Years)', fontsize=14, fontweight='bold')
    ax4.set_xlabel('Application')
    ax4.set_ylabel('Number of Experienced Users')
    ax4.tick_params(axis='x', rotation=45)

# 5. New user distribution (< 1 Year)
ax5 = plt.subplot(4, 3, 11)
new_users = {}
for field, label in experience_mapping.items():
    if field.startswith('exp_app_') and f'operational_jcc2_experience.{field}' in df.columns:
        exp_counts = df[f'operational_jcc2_experience.{field}'].value_counts()
        new = exp_counts.get('< 1 Year', 0)
        new_users[label] = new

if new_users:
    new_series = pd.Series(new_users).sort_values(ascending=False).head(10)
    new_series.plot(kind='bar', ax=ax5, color=COLORS['info'])
    ax5.set_title('Applications with Most New Users (< 1 Year)', fontsize=14, fontweight='bold')
    ax5.set_xlabel('Application')
    ax5.set_ylabel('Number of New Users')
    ax5.tick_params(axis='x', rotation=45)

# 6. Application adoption rate (users with any experience vs NA)
ax6 = plt.subplot(4, 3, 12)
adoption_rates = {}
for field, label in experience_mapping.items():
    if field.startswith('exp_app_') and f'operational_jcc2_experience.{field}' in df.columns:
        exp_counts = df[f'operational_jcc2_experience.{field}'].value_counts()
        total_responses = exp_counts.sum()
        na_count = exp_counts.get('NA', 0)
        if total_responses > 0:
            adoption_rate = (total_responses - na_count) / total_responses * 100
            adoption_rates[label] = adoption_rate

if adoption_rates:
    adoption_series = pd.Series(adoption_rates).sort_values(ascending=True)
    adoption_series.plot(kind='barh', ax=ax6, color=COLORS['primary'])
    ax6.set_title('Application Adoption Rates', fontsize=14, fontweight='bold')
    ax6.set_xlabel('Adoption Rate (%)')
    ax6.set_ylabel('Application')
    ax6.set_xlim(0, 100)
    for i, (app, rate) in enumerate(adoption_series.items()):
        ax6.text(rate + 1, i, f'{rate:.1f}%', va='center', fontsize=9)

plt.suptitle('Operational JCC2 Experience Analysis', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()

In [None]:
# Additional analysis: Experience correlation matrix
fig, ax = plt.subplots(figsize=(12, 10))

# Create numeric mapping for experience levels
exp_numeric_map = {
    '< 1 Year': 1,
    '1-3 Years': 2,
    '3-5 Years': 3,
    '> 5 Years': 4,
    'NA': 0
}

# Convert experience columns to numeric
exp_columns = []
exp_labels = []
for field, label in experience_mapping.items():
    if field.startswith('exp_app_') and f'operational_jcc2_experience.{field}' in df.columns:
        col_name = f'operational_jcc2_experience.{field}'
        df[f'{col_name}_numeric'] = df[col_name].map(exp_numeric_map).fillna(0)
        exp_columns.append(f'{col_name}_numeric')
        exp_labels.append(label)

if exp_columns:
    # Calculate correlation matrix
    corr_matrix = df[exp_columns].corr()
    
    # Create mask for upper triangle
    mask = np.triu(np.ones_like(corr_matrix, dtype=bool))
    
    # Plot heatmap
    sns.heatmap(corr_matrix, mask=mask, annot=True, fmt='.2f', cmap='coolwarm', 
                center=0, square=True, linewidths=1, cbar_kws={"shrink": .8},
                xticklabels=exp_labels, yticklabels=exp_labels, ax=ax)
    ax.set_title('Application Experience Correlation Matrix', fontsize=16, fontweight='bold')
    plt.tight_layout()
    plt.show()
    
    # Clean up numeric columns
    df.drop(columns=exp_columns, inplace=True)

## 5. MOP 1.1.1 Intelligence Data Section Analysis

In [None]:
# Analyze mop_1_1_1 section
mop_summary = processor.get_section_summary('mop_1_1_1')
print("MOP 1.1.1 Intelligence Data Fields:")
intel_fields = {}
for field, data in mop_summary['field_summaries'].items():
    field_name = field.split('.')[-1]
    intel_fields[field_name] = data
    print(f"  - {field_name}: {data['field_type']} (completion: {data['completion_rate']:.1%})")

In [None]:
# MOP 1.1.1 Intelligence Data Visualizations
fig = plt.figure(figsize=(24, 20))

# Extract application names from field names
intel_apps = set()
for field in mop_summary['field_summaries'].keys():
    parts = field.split('_')
    if len(parts) > 3:
        app_name = '_'.join(parts[3:]) if parts[2] in ['provided', 'completion'] else parts[-1]
        intel_apps.add(app_name)

intel_apps = sorted(list(intel_apps))

# 1. Intelligence data provision by application
ax1 = plt.subplot(4, 3, 1)
provided_data = {}
for app in intel_apps:
    field_name = f'mop_1_1_1.intelligence_data_provided_{app}'
    if field_name in df.columns:
        counts = df[field_name].value_counts()
        provided_data[app] = counts

if provided_data:
    # Calculate effectiveness scores
    effectiveness_map = {
        'Completely Ineffective': 1, 'Moderately Ineffective': 2, 'Slightly Ineffective': 3,
        'Slightly Effective': 4, 'Moderately Effective': 5, 'Completely Effective': 6,
        'Not Applicable': 0
    }
    
    avg_scores = {}
    for app, counts in provided_data.items():
        total_score = sum(effectiveness_map.get(rating, 0) * count for rating, count in counts.items())
        total_responses = sum(count for rating, count in counts.items() if rating != 'Not Applicable')
        if total_responses > 0:
            avg_scores[app.upper()] = total_score / total_responses
    
    if avg_scores:
        scores_series = pd.Series(avg_scores).sort_values(ascending=True)
        scores_series.plot(kind='barh', ax=ax1, color=COLORS['primary'])
        ax1.set_title('Average Intelligence Data Provision Effectiveness', fontsize=14, fontweight='bold')
        ax1.set_xlabel('Average Effectiveness Score (1-6)')
        ax1.set_xlim(0, 6)
        for i, (app, score) in enumerate(scores_series.items()):
            ax1.text(score + 0.1, i, f'{score:.2f}', va='center', fontsize=9)

# 2. Role completion effectiveness by application
ax2 = plt.subplot(4, 3, 2)
completion_data = {}
for app in intel_apps:
    field_name = f'mop_1_1_1.intelligence_data_completion_of_role_{app}'
    if field_name in df.columns:
        counts = df[field_name].value_counts()
        completion_data[app] = counts

if completion_data:
    avg_completion_scores = {}
    for app, counts in completion_data.items():
        total_score = sum(effectiveness_map.get(rating, 0) * count for rating, count in counts.items())
        total_responses = sum(count for rating, count in counts.items() if rating != 'Not Applicable')
        if total_responses > 0:
            avg_completion_scores[app.upper()] = total_score / total_responses
    
    if avg_completion_scores:
        comp_series = pd.Series(avg_completion_scores).sort_values(ascending=True)
        comp_series.plot(kind='barh', ax=ax2, color=COLORS['secondary'])
        ax2.set_title('Average Role Completion Support Effectiveness', fontsize=14, fontweight='bold')
        ax2.set_xlabel('Average Effectiveness Score (1-6)')
        ax2.set_xlim(0, 6)
        for i, (app, score) in enumerate(comp_series.items()):
            ax2.text(score + 0.1, i, f'{score:.2f}', va='center', fontsize=9)

# 3. Overall intelligence effectiveness
ax3 = plt.subplot(4, 3, 3)
if 'mop_1_1_1.intelligence_data_overall_effectiveness' in df.columns:
    overall_counts = df['mop_1_1_1.intelligence_data_overall_effectiveness'].value_counts()
    colors_map = {
        'Completely Effective': COLORS['success'],
        'Moderately Effective': COLORS['info'],
        'Slightly Effective': COLORS['primary'],
        'Slightly Ineffective': COLORS['tertiary'],
        'Moderately Ineffective': COLORS['warning'],
        'Completely Ineffective': COLORS['quaternary'],
        'Not Applicable': 'gray'
    }
    colors = [colors_map.get(rating, 'gray') for rating in overall_counts.index]
    overall_counts.plot(kind='pie', ax=ax3, autopct='%1.1f%%', colors=colors, startangle=90)
    ax3.set_title('Overall Intelligence Data Effectiveness', fontsize=14, fontweight='bold')
    ax3.set_ylabel('')

# 4. Effectiveness comparison heatmap
ax4 = plt.subplot(4, 3, (4, 6))  # Span multiple cells
if provided_data and completion_data:
    # Create comparison matrix
    comparison_data = []
    app_labels = []
    
    for app in intel_apps:
        if app in provided_data and app in completion_data:
            app_labels.append(app.upper())
            row = []
            
            # Data provision scores
            for rating in ['Completely Ineffective', 'Moderately Ineffective', 'Slightly Ineffective',
                          'Slightly Effective', 'Moderately Effective', 'Completely Effective']:
                row.append(provided_data[app].get(rating, 0))
            
            comparison_data.append(row)
    
    if comparison_data:
        comp_df = pd.DataFrame(comparison_data, index=app_labels, 
                              columns=['Completely\nIneffective', 'Moderately\nIneffective', 'Slightly\nIneffective',
                                      'Slightly\nEffective', 'Moderately\nEffective', 'Completely\nEffective'])
        sns.heatmap(comp_df, annot=True, fmt='d', cmap='RdYlGn', ax=ax4, cbar_kws={'label': 'Response Count'})
        ax4.set_title('Intelligence Data Provision Effectiveness Distribution', fontsize=14, fontweight='bold')
        ax4.set_xlabel('Effectiveness Rating')
        ax4.set_ylabel('Application')

# 5. Provision vs Completion scatter plot
ax5 = plt.subplot(4, 3, 7)
if avg_scores and avg_completion_scores:
    scatter_data = []
    for app in set(avg_scores.keys()) & set(avg_completion_scores.keys()):
        scatter_data.append({
            'app': app,
            'provision': avg_scores[app],
            'completion': avg_completion_scores[app]
        })
    
    if scatter_data:
        scatter_df = pd.DataFrame(scatter_data)
        ax5.scatter(scatter_df['provision'], scatter_df['completion'], s=100, alpha=0.6, color=COLORS['primary'])
        
        # Add labels
        for _, row in scatter_df.iterrows():
            ax5.annotate(row['app'], (row['provision'], row['completion']), 
                        xytext=(5, 5), textcoords='offset points', fontsize=8)
        
        # Add diagonal line
        ax5.plot([0, 6], [0, 6], 'k--', alpha=0.3)
        
        ax5.set_xlabel('Data Provision Effectiveness')
        ax5.set_ylabel('Role Completion Effectiveness')
        ax5.set_title('Provision vs Completion Effectiveness', fontsize=14, fontweight='bold')
        ax5.set_xlim(0, 6)
        ax5.set_ylim(0, 6)
        ax5.grid(True, alpha=0.3)

# 6. Response rate by application
ax6 = plt.subplot(4, 3, 8)
response_rates = {}
for app in intel_apps:
    field_name = f'mop_1_1_1.intelligence_data_provided_{app}'
    if field_name in df.columns:
        total = len(df)
        non_na = df[field_name].notna().sum()
        response_rates[app.upper()] = (non_na / total) * 100

if response_rates:
    resp_series = pd.Series(response_rates).sort_values(ascending=True)
    resp_series.plot(kind='barh', ax=ax6, color=COLORS['info'])
    ax6.set_title('Response Rates by Application', fontsize=14, fontweight='bold')
    ax6.set_xlabel('Response Rate (%)')
    ax6.set_xlim(0, 100)
    for i, (app, rate) in enumerate(resp_series.items()):
        ax6.text(rate + 1, i, f'{rate:.1f}%', va='center', fontsize=9)

# 7. Top and bottom performers
ax7 = plt.subplot(4, 3, 9)
if avg_scores:
    # Combine provision and completion scores
    combined_scores = {}
    for app in avg_scores:
        prov_score = avg_scores.get(app, 0)
        comp_score = avg_completion_scores.get(app, 0) if avg_completion_scores else 0
        combined_scores[app] = (prov_score + comp_score) / 2 if comp_score else prov_score
    
    # Get top 5 and bottom 5
    sorted_apps = sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)
    top_bottom = sorted_apps[:5] + sorted_apps[-5:]
    
    apps = [app for app, _ in top_bottom]
    scores = [score for _, score in top_bottom]
    colors = [COLORS['success'] if i < 5 else COLORS['quaternary'] for i in range(len(apps))]
    
    y_pos = np.arange(len(apps))
    ax7.barh(y_pos, scores, color=colors)
    ax7.set_yticks(y_pos)
    ax7.set_yticklabels(apps)
    ax7.set_xlabel('Combined Effectiveness Score')
    ax7.set_title('Top 5 and Bottom 5 Applications', fontsize=14, fontweight='bold')
    ax7.set_xlim(0, 6)
    
    for i, score in enumerate(scores):
        ax7.text(score + 0.1, i, f'{score:.2f}', va='center', fontsize=9)

# 8. Effectiveness distribution
ax8 = plt.subplot(4, 3, 10)
all_ratings = []
for app in intel_apps:
    field_name = f'mop_1_1_1.intelligence_data_provided_{app}'
    if field_name in df.columns:
        ratings = df[field_name].dropna()
        all_ratings.extend(ratings.tolist())

if all_ratings:
    rating_counts = pd.Series(all_ratings).value_counts()
    rating_order = ['Completely Ineffective', 'Moderately Ineffective', 'Slightly Ineffective',
                   'Slightly Effective', 'Moderately Effective', 'Completely Effective', 'Not Applicable']
    rating_counts = rating_counts.reindex(rating_order, fill_value=0)
    
    colors = [colors_map.get(rating, 'gray') for rating in rating_counts.index]
    rating_counts.plot(kind='bar', ax=ax8, color=colors)
    ax8.set_title('Overall Rating Distribution', fontsize=14, fontweight='bold')
    ax8.set_xlabel('Rating')
    ax8.set_ylabel('Count')
    ax8.tick_params(axis='x', rotation=45)

# 9. NA rate comparison
ax9 = plt.subplot(4, 3, 11)
na_rates = {}
for app in intel_apps:
    field_name = f'mop_1_1_1.intelligence_data_provided_{app}'
    if field_name in df.columns:
        na_count = (df[field_name] == 'Not Applicable').sum()
        total = df[field_name].notna().sum()
        if total > 0:
            na_rates[app.upper()] = (na_count / total) * 100

if na_rates:
    na_series = pd.Series(na_rates).sort_values(ascending=False).head(10)
    na_series.plot(kind='bar', ax=ax9, color=COLORS['warning'])
    ax9.set_title('"Not Applicable" Response Rates (Top 10)', fontsize=14, fontweight='bold')
    ax9.set_xlabel('Application')
    ax9.set_ylabel('NA Rate (%)')
    ax9.tick_params(axis='x', rotation=45)
    ax9.set_ylim(0, 100)

plt.suptitle('MOP 1.1.1 Intelligence Data Analysis', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()

## 6. Summary Dashboard

In [None]:
# Create a summary dashboard combining key insights from all sections
fig = plt.figure(figsize=(20, 16))

# 1. Section completion overview
ax1 = plt.subplot(3, 3, 1)
section_completion = {}
for section in sections_to_analyze:
    if section in processor.sections:
        fields = processor.sections[section]
        completion_rates = [df[field].notna().mean() for field in fields if field in df.columns]
        section_completion[section] = np.mean(completion_rates) if completion_rates else 0

comp_series = pd.Series(section_completion)
comp_series.plot(kind='bar', ax=ax1, color=COLORS['primary'])
ax1.set_title('Average Completion Rate by Section', fontsize=14, fontweight='bold')
ax1.set_xlabel('Section')
ax1.set_ylabel('Completion Rate')
ax1.set_ylim(0, 1)
ax1.tick_params(axis='x', rotation=45)
for i, (section, rate) in enumerate(comp_series.items()):
    ax1.text(i, rate + 0.02, f'{rate:.1%}', ha='center', fontweight='bold')

# 2. Cyber operator distribution by experience
ax2 = plt.subplot(3, 3, 2)
if 'role_and_echelon.is_cyber_operator' in df.columns and 'operational_jcc2_experience.exp_cyberoperations' in df.columns:
    cyber_exp_cross = pd.crosstab(df['role_and_echelon.is_cyber_operator'],
                                  df['operational_jcc2_experience.exp_cyberoperations'])
    cyber_exp_cross.T.plot(kind='bar', ax=ax2, color=[COLORS['success'], COLORS['warning']])
    ax2.set_title('Cyber Operations Experience by Operator Status', fontsize=14, fontweight='bold')
    ax2.set_xlabel('Experience Level')
    ax2.set_ylabel('Count')
    ax2.legend(title='Is Cyber Operator')
    ax2.tick_params(axis='x', rotation=45)

# 3. Top applications by usage and effectiveness
ax3 = plt.subplot(3, 3, 3)
# Combine adoption rate and effectiveness
app_scores = {}
for app in ['a2it', 'cad', 'codex', 'crucible', 'cyber9line', 'dispatch', 
            'jcc2cyberops', 'jcc2readiness', 'madss', 'rally', 'redmap', 
            'sigact', 'threathub', 'triage', 'unity']:
    # Check experience field
    exp_field = f'operational_jcc2_experience.exp_app_{app}'
    intel_field = f'mop_1_1_1.intelligence_data_provided_{app}'
    
    adoption = 0
    effectiveness = 0
    
    if exp_field in df.columns:
        exp_counts = df[exp_field].value_counts()
        total = exp_counts.sum()
        na_count = exp_counts.get('NA', 0)
        if total > 0:
            adoption = (total - na_count) / total
    
    if intel_field in df.columns:
        ratings = df[intel_field].dropna()
        if len(ratings) > 0:
            numeric_ratings = ratings.map(effectiveness_map).fillna(0)
            effectiveness = numeric_ratings[numeric_ratings > 0].mean() / 6 if len(numeric_ratings[numeric_ratings > 0]) > 0 else 0
    
    if adoption > 0 or effectiveness > 0:
        app_scores[app.upper()] = (adoption + effectiveness) / 2

if app_scores:
    top_apps = pd.Series(app_scores).sort_values(ascending=False).head(10)
    top_apps.plot(kind='bar', ax=ax3, color=COLORS['tertiary'])
    ax3.set_title('Top 10 Applications (Combined Score)', fontsize=14, fontweight='bold')
    ax3.set_xlabel('Application')
    ax3.set_ylabel('Combined Score (0-1)')
    ax3.tick_params(axis='x', rotation=45)

# 4. Response distribution over time
ax4 = plt.subplot(3, 3, 4)
if 'user_information.date' in df.columns:
    df['month_year'] = pd.to_datetime(df['user_information.date']).dt.to_period('M')
    monthly_counts = df['month_year'].value_counts().sort_index()
    monthly_counts.index = monthly_counts.index.to_timestamp()
    monthly_counts.plot(kind='line', ax=ax4, color=COLORS['secondary'], marker='o', linewidth=2)
    ax4.set_title('Response Trend Over Time', fontsize=14, fontweight='bold')
    ax4.set_xlabel('Month')
    ax4.set_ylabel('Number of Responses')
    ax4.grid(True, alpha=0.3)

# 5. Experience level distribution summary
ax5 = plt.subplot(3, 3, 5)
exp_summary = {'< 1 Year': 0, '1-3 Years': 0, '3-5 Years': 0, '> 5 Years': 0}
exp_fields = [col for col in df.columns if 'operational_jcc2_experience.exp_app_' in col]
for field in exp_fields:
    counts = df[field].value_counts()
    for exp_level in exp_summary:
        exp_summary[exp_level] += counts.get(exp_level, 0)

exp_series = pd.Series(exp_summary)
exp_series.plot(kind='pie', ax=ax5, autopct='%1.1f%%', startangle=90, 
                colors=[COLORS['info'], COLORS['primary'], COLORS['tertiary'], COLORS['success']])
ax5.set_title('Overall Experience Distribution', fontsize=14, fontweight='bold')
ax5.set_ylabel('')

# 6. Key metrics summary
ax6 = plt.subplot(3, 3, 6)
ax6.axis('off')
metrics_text = f"""
Key Metrics Summary:

Total Responses: {len(df)}
Unique Events: {df['user_information.event'].nunique()}
Unique Units: {df['user_information.unit'].nunique()}
Cyber Operators: {(df['role_and_echelon.is_cyber_operator'] == 'Yes').sum()}
Average Fields Completed: {df.notna().sum(axis=1).mean():.0f}
Most Used Application: {top_apps.index[0] if len(top_apps) > 0 else 'N/A'}
"""
ax6.text(0.1, 0.5, metrics_text, transform=ax6.transAxes, fontsize=12, 
         verticalalignment='center', fontfamily='monospace',
         bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

# 7. Data quality heatmap
ax7 = plt.subplot(3, 3, (7, 9))  # Span bottom row
quality_matrix = []
section_names = []
for section in sections_to_analyze:
    if section in processor.sections:
        section_names.append(section.replace('_', ' ').title())
        fields = processor.sections[section]
        row = []
        for field in fields[:10]:  # First 10 fields for readability
            if field in df.columns:
                row.append(df[field].notna().mean())
        quality_matrix.append(row)

if quality_matrix:
    quality_df = pd.DataFrame(quality_matrix, index=section_names)
    sns.heatmap(quality_df, annot=True, fmt='.2f', cmap='RdYlGn', ax=ax7, 
                cbar_kws={'label': 'Completion Rate'}, vmin=0, vmax=1)
    ax7.set_title('Data Quality Heatmap by Section', fontsize=14, fontweight='bold')
    ax7.set_xlabel('Field Index')
    ax7.set_ylabel('Section')

plt.suptitle('JCC2 User Questionnaire - Executive Summary Dashboard', fontsize=18, fontweight='bold')
plt.tight_layout()
plt.show()

## 7. Export Visualizations

In [None]:
# Function to save all visualizations
def save_all_visualizations():
    """
    Re-run all visualizations and save them as high-quality images
    """
    import os
    
    # Create output directory
    output_dir = 'jcc2_visualizations'
    os.makedirs(output_dir, exist_ok=True)
    
    print(f"Saving visualizations to {output_dir}/...")
    
    # Note: In a real implementation, you would re-run each visualization
    # and save it using plt.savefig(). Here's an example structure:
    
    sections = [
        'user_information_analysis',
        'role_echelon_analysis', 
        'operational_experience_analysis',
        'mop_111_intelligence_analysis',
        'executive_summary_dashboard'
    ]
    
    print("\nTo save visualizations, re-run each section's code and add:")
    print("plt.savefig(f'{output_dir}/section_name.png', dpi=300, bbox_inches='tight')")
    print("\nVisualization sections ready for export:")
    for section in sections:
        print(f"  - {section}")

# Call the function
save_all_visualizations()

In [None]:
# Create analysis summary report
print("=" * 80)
print("JCC2 USER QUESTIONNAIRE ANALYSIS COMPLETE")
print("=" * 80)
print(f"\nData Source: {csv_file}")
print(f"Total Responses Analyzed: {len(df)}")
print(f"Date Range: {df['user_information.date'].min()} to {df['user_information.date'].max()}")
print(f"\nSections Analyzed:")
for section in sections_to_analyze:
    if section in processor.sections:
        print(f"  ✓ {section}: {len(processor.sections[section])} fields")
print("\nKey Insights Generated:")
print("  • User demographics and participation patterns")
print("  • Role distribution and cyber operator analysis")
print("  • Application experience levels and adoption rates")
print("  • Intelligence data effectiveness ratings")
print("  • Cross-sectional quality and completion metrics")
print("\nVisualization Types Created:")
print("  • Bar charts, pie charts, and histograms")
print("  • Heatmaps and correlation matrices")
print("  • Time series and trend analyses")
print("  • Scatter plots and cross-tabulations")
print("  • Executive summary dashboard")
print("\n" + "=" * 80)