# Perform Topic Extraction and Multi Label Classification

In [None]:
import pandas as pd 
from openai import OpenAI
import keyring
from typing import List, Union, Dict

import re
import time

import datetime
date = datetime.datetime.now().strftime("%Y-%m-%d")

from Classification_Helpers import (shuffle_extract, shuffle_extract_w_defs,
                                    wide_encode_output, getNumTokens, 
                                    get_binary_counts, get_topic_counts, get_wide_net_binary,
                                    get_running_cost, multiclassifyVerbatim, multiclassifyVerbatimwDefs
                                )

### Setting up OpenAI API Key

For each new project, make sure there is a new Project set up in the OpenAI NAXION organization.

  - Create a new API key for yourself in the project.
  - Store the API in Windows Credentials with the following naming scheme
    - Address: OPENAI_KEY_{JOB #}
    - Username: {JOB #}
    - Password: {API KEY}

In [None]:
# Establishing api connection to OpenAI
key = "DEVELOPMENT"
client = OpenAI(api_key=keyring.get_password(f"OPENAI_KEY_{key}", key))

# Preprocessing Data

Here we need to load in the data and get it into the proper format for the extraction and classification. This could be a different process for every new project since data is always different.

In [None]:
input_df = pd.read_csv('Input_Data/Verbatims.csv')

# Rename ID to NAID
input_df.rename(columns={'ID': 'NAID'}, inplace=True)

In [None]:
cols = input_df.drop(["NAID"], axis = 1).columns.tolist()
cols

In [None]:
# Set extra context for the question
q_context = {
    "Q5a": "Respondents were asked to identify the company/provider and Large Language Model (LLM) they find the most relevant and to provide a brief explanation for their choice. These are the explanations provided by the respondents.",
    "Q5b": "Respondents were asked to identify the company/provider and Large Language Model (LLM) they find the least relevant and to provide a brief explanation for their choice. These are the explanations provided by the respondents."
}

In [None]:
for col in cols:
    vals = input_df[col].dropna()
    print(f"{col}: {len(vals)}")
    num_tokens = getNumTokens("|".join(vals))
    print(f"Tokens: {num_tokens}")

# Topic Extraction

In [None]:
topics_dict = {}

In [None]:
start_time = time.time()
for col in cols:
    q_time = time.time()
    normal_context = q_context[col]
    print(f'Context: {normal_context}')
    
    topics_1 = shuffle_extract(client, input_df, col, 40, normal_context)
    topics_2 = shuffle_extract(client, input_df, col, 40, normal_context)
    topics_3 = shuffle_extract(client, input_df, col, 40, normal_context)
    
    topics_dict[col] = {
        'Topics_1' : topics_1,
        'Topics_2' : topics_2,
        'Topics_3' : topics_3
    }
    
    print(f"Time for {col}: {round((time.time() - q_time) / 60,2)} minutes\n")
print("-" * 50)
print(f"Total time: {round((time.time() - start_time) / 60,2)} minutes")

## Saving to Excel

In [None]:
# Write Staff results to an excel file in different sheets
with pd.ExcelWriter('Output/Topics/Topics_Extracted.xlsx') as writer:
    for col, dict in topics_dict.items():
        df = pd.DataFrame(dict)
        df.to_excel(writer, sheet_name=col, index=False)
        worksheet = writer.sheets[col]
        # set all columns to have width of 50
        worksheet.set_column('A:C', 50)
    writer.save()

# Multilabel Classification

In [None]:
def write_to_excel(path, dfs, topics_list, net_groups, tab_colors = None):

    # Write to excel with each sheet being a different format
    with pd.ExcelWriter(path) as writer:
        workbook = writer.book

        # Create text wrap format
        text_wrap = workbook.add_format({'text_wrap': True})
        center_wrap = workbook.add_format({'text_wrap': True, 'valign': 'vcenter', 'align': 'center'})
        center_wrap_blue = workbook.add_format({'text_wrap': True, 'valign': 'vcenter', 'align': 'center', 'bg_color': '#009999'})
        # Create a format for the question (centered and bold)
        format_q = writer.book.add_format({'align': 'center', 'valign': 'vcenter', 'bold': True})
        
        for df_name, df in dfs.items():
            print(df_name)
            question = df_name.split('_')[0]
            
            topics = topics_list[question]
            
            # Replace smart quotes (/') with regular quotes
            df = df.replace({"/'": "'", "_x000D_": "", "\\n": "|"}, regex=True)         
            # Raw
            df.to_excel(writer, sheet_name=f'{df_name}_raw', index=False, startrow=1)
            n_cols_raw = df.shape[1]
            worksheet_raw = writer.sheets[f'{df_name}_raw']
            worksheet_raw.set_column('A:A', 15, center_wrap)
            worksheet_raw.set_column('B:B', 50, text_wrap)
            worksheet_raw.set_column(f'C:{chr(65 + n_cols_raw - 1)}', 35, center_wrap)
            worksheet_raw.freeze_panes(2, 2)  # Freeze first 2 rows and first 2 columns.\n",
            worksheet_raw.merge_range(0, 0, 0, len(df.columns) - 1, agent_qs[question], format_q) # Add the question as the first row and merge the cells horizontally 
            
            # Counts
            topic_counts = get_topic_counts(df).reset_index(drop=False).rename(columns={'index': 'Topics'})
            topic_counts.to_excel(writer, sheet_name=f'{df_name}_counts', index=False)
            worksheet_counts = writer.sheets[f'{df_name}_counts']
            worksheet_counts.set_column('A:A', 50, center_wrap)
            worksheet_counts.set_column('B:B', 15, center_wrap)

            # Binary
            wide_output_binary = wide_encode_output(df, id_cols = ['NAID', question], bin_rank = 'binary') 
            col_order = ['NAID', question] + [x for x in topics if x in wide_output_binary.columns] + ['Other']  
            wide_output_binary[col_order].to_excel(writer, sheet_name=f'{df_name}_granular', index=False)
            worksheet_granular = writer.sheets[f'{df_name}_granular']
            worksheet_granular.set_column('A:A', 15, center_wrap)
            worksheet_granular.set_column('B:B', 50, text_wrap)
            n_rows, n_cols = wide_output_binary.shape
            worksheet_granular.set_column(2, n_cols-1, 35, center_wrap)
            # Add conditional formatting to binary sheet if the value is 1
            worksheet_granular.conditional_format(1, 2, n_rows-1, n_cols-1, {'type':     'cell',
                                                'criteria': '=',
                                                'value':    1,
                                                'format':   center_wrap_blue})
            worksheet_granular.freeze_panes(1, 2)  # Freeze first row and first 2 columns.
            
            if net_groups:
                # Net Binary
                wide_net_binary = get_wide_net_binary(wide_output_binary, question, net_groups[question])
                wide_net_binary.to_excel(writer, sheet_name=f'{df_name}_net_binary', index=False)
                worksheet_net_binary = writer.sheets[f'{df_name}_net_binary']
                worksheet_net_binary.set_column('A:A', 15, center_wrap)
                worksheet_net_binary.set_column('B:B', 50, text_wrap)
                n_rows, n_cols = wide_net_binary.shape
                worksheet_net_binary.set_column(2, n_cols-1, 35, center_wrap)
                # Add conditional formatting to binary sheet if the value is 1
                worksheet_net_binary.conditional_format(1, 2, n_rows-1, n_cols-1, {'type':     'cell',
                                                    'criteria': '=',
                                                    'value':    1,
                                                    'format':   center_wrap_blue})
                worksheet_net_binary.freeze_panes(1, 2)
                
                # Net Counts
                net_counts = get_binary_counts(wide_net_binary)
                net_counts.to_excel(writer, sheet_name=f'{df_name}_net_counts', index=False)
                worksheet_net_counts = writer.sheets[f'{df_name}_net_counts']
                worksheet_net_counts.set_column('A:A', 50, center_wrap)
                worksheet_net_counts.set_column('B:B', 15, center_wrap)
                worksheet_net_counts.freeze_panes(1, 2)
            
            
            if tab_colors:
                color = tab_colors.get(df_name)
                worksheet_raw.set_tab_color(color)
                worksheet_counts.set_tab_color(color)
                worksheet_granular.set_tab_color(color)
                
                if net_groups:
                    worksheet_net_binary.set_tab_color(color)
                    worksheet_net_counts.set_tab_color(color)

### Final Set of Topics

This code chunk is useful if you have copilot. The "raw q06" is what the PM provides to me, and the list_topics_106 is the format I want it in. So I keep that as an example and then I can paste the PM input into raw_topics_q39 or wahtever, and copilot will automatically generate the list topics in the output I desire. Then I put it in the list_topics dictionary

In [None]:
raw_topics_q06 = """Remote/Hybrid Work Model: Hybrid work model, Lack of face-to-face time, Virtual connection, In-person connections
DE&I: Age diversity, Diversity and inclusion, Diversity in leadership, Inclusivity initiatives, Cultural differences, Bias awareness
Employee Training/Development: Mentorship programs, Learning circles, Career development, Professional growth, Leadership development, Professional relationships, Training opportunities, Personal growth opportunities
Culture/Values: Company culture, Feedback culture, Family-oriented culture, Cultural fit, Alignment with values, Company values, Traditional family emphasis, Corporate purpose, Open door culture, Collaborative work culture, Cathy family influence, Sense of belonging
Employee Well-Being/Care: Employee well-being, Work-life balance, Employee empowerment, Employee engagement, Recognition, Recognition practices
Peer practices: Peer relationships, Peer support, Peer interaction, Team interaction
Events/Activities: Team retreats, Social activities, In-person events, Cultural cornerstone events
Communication: Supervisory communication, Departmental communication, Direct communication
Collaboration: Team collaboration, Cross-functional projects, Departmental connection

"""

list_topics_q06 = ["Hybrid work model", "Lack of face-to-face time", "Virtual connection", 'In-person connection',
               "Age diversity", "Diversity and inclusion", "Diversity in leadership", "Inclusivity initiatives", "Cultural differences", "Bias awareness",
               "Mentorship programs", "Learning circles", "Career development", "Professional growth", "Leadership development", "Professional relationships", "Training opportunities", "Personal growth opportunities",
               "Company culture", "Feedback culture", "Family-oriented culture", "Cultural fit", "Alignment with values", "Company values", "Traditional family emphasis", "Corporate purpose", "Open door culture", "Collaborative work culture", "Cathy family influence", "Sense of belonging",
               "Employee well-being", "Work-life balance", "Employee empowerment", "Employee engagement", "Recognition", "Recognition practices",
               "Peer relationships", "Peer support", "Peer interaction", "Team interaction",
               "Team retreats", "Social activities", "In-person events", "Cultural cornerstone events",
               "Supervisory communication", "Departmental communication", "Direct communication",
               "Team collaboration", "Cross-functional projects", "Departmental connection"]

raw_topics_q39 = """
Workload: Work-Life Balance
Hybrid/Remote Model: Flexible Future Model, Hybrid Work Challenges, Home Office Setup Support, Traffic and Commute, Remote Work Stipends 
In-Person/On-site Collaboration: In-Person Collaboration, Team Days Onsite, In-Person Meetings
Technology: Remote Work Tools, Technology Upgrades, Meeting Technology Issues, Meeting Room Tech Reliability, Office Technology Consistency, Workplace Technology Consistency
Office Design/Management: Private Workspaces, Office Space Availability, Personalized Work Environment, Office Design, Office Amenities, Meeting Room Availability, 
Meeting Frequency: Meeting Effectiveness, 
Productivity: Productivity Tools, Staff Training on Productivity, Heads Down Work,
Collaboration: Team Connection Days, Teamwork and Collaboration
Culture/Values: Cultural Connection,
Training/Development: Professional Development
Management: Leadership Visibility, Staff Accountability, Team Building, Team Coordination, Team Connection, Defined Goals and Expectations, Communication: Cross-Functional Communication, Clear Communication Channels, Communication Tools, Communication Channels
Engagement: Staff Engagement, 
Cultural Events: Cornerstone Events, Cultural Events
Benefits: On-Site Childcare Options
*Safety: Workplace Safety
"""

list_topics_q39 = ["Work-Life Balance", 
                   "Flexible Future Model", "Hybrid Work Challenges", "Home Office Setup Support", "Traffic and Commute", "Remote Work Stipends",
                   "In-Person Collaboration", "Team Days Onsite", "In-Person Meetings",
                   "Remote Work Tools", "Technology Upgrades", "Meeting Technology Issues", "Meeting Room Tech Reliability", "Office Technology Consistency", "Workplace Technology Consistency",
                   "Private Workspaces", "Office Space Availability", "Personalized Work Environment", "Office Design", "Office Amenities", "Meeting Room Availability",
                   "Meeting Effectiveness",
                   "Productivity Tools", "Staff Training on Productivity", "Heads Down Work",
                   "Team Connection Days", "Teamwork and Collaboration",
                   "Cultural Connection",
                   "Professional Development",
                   "Leadership Visibility", "Staff Accountability", "Team Building", "Team Coordination", "Team Connection", "Defined Goals and Expectations", "Cross-Functional Communication", "Clear Communication Channels", "Communication Tools", "Communication Channels",
                   "Staff Engagement",
                   "Cornerstone Events", "Cultural Events",
                   "On-Site Childcare Options",
                   "Workplace Safety"]


In [None]:
topics_list ={
    
    'Q06' : ["Hybrid work model", "Lack of face-to-face time", "Virtual connection", 'In-person connection',
               "Age diversity", "Diversity and inclusion", "Diversity in leadership", "Inclusivity initiatives", "Cultural differences", "Bias awareness",
               "Mentorship programs", "Learning circles", "Career development", "Professional growth", "Leadership development", "Professional relationships", "Training opportunities", "Personal growth opportunities",
               "Company culture", "Feedback culture", "Family-oriented culture", "Cultural fit", "Alignment with values", "Company values", "Traditional family emphasis", "Corporate purpose", "Open door culture", "Collaborative work culture", "Cathy family influence", "Sense of belonging",
               "Employee well-being", "Work-life balance", "Employee empowerment", "Employee engagement", "Recognition", "Recognition practices",
               "Peer relationships", "Peer support", "Peer interaction", "Team interaction",
               "Team retreats", "Social activities", "In-person events", "Cultural cornerstone events",
               "Supervisory communication", "Departmental communication", "Direct communication",
               "Team collaboration", "Cross-functional projects", "Departmental connection"],
    'Q39' : ["Cathy Family Leadership", "Biblical Principles", "Culture of Care", "Generosity and Giving", "Corporate Purpose", "Prayer and Devotion", "Positive Workplace Culture", "Purpose-Driven Work", "Culture of Hospitality", "Volunteerism and Philanthropy", "Brand Reputation", "Caring Company Reputation", "Positive Work Environment",
                "Ethical Business Practices", "Sustainability Efforts",
                "Cultural Ambassadorship", "Cultural Cornerstone Events",
                "DEI Initiatives", "Inclusive Workforce",
                "Employee Well-being", "Employee Benefits", "Employee Engagement", "Recognition and Rewards", "Work-Life Balance", "Team Member Recognition", "Team Member Support", "Health and Wellness Programs", "Operator Support",
                "Team Member Development", "Professional Development", "Professional Growth", "Leadership Development", "Staff Development Opportunities",
                "Community Service",
                "Remote Work Support", "Flexible Work Environment",
                "Customer Care Initiatives", "Customer Service Excellence", "Customer Feedback", "Growth and Adaptation", "Continuous Improvement", "Innovation and Growth", "Cultural Adaptation", "Cultural Drift Concerns",
                "Employee Feedback Mechanisms", "Transparent Communication",
                "Purpose-Driven Hiring",
                "Employee Onboarding Experience",
                "Team Collaboration", "Collaborative Culture",
                "Strategic Priorities", "Strategic Decision Making", "Scholarship Programs",
                "Servant Leadership", "Caring Leadership", "Stewardship of Resources"],
    'Q40' : ["DE&I Initiatives",
              "Remote Work, Flexible Futures Model",
              "Staff Onboarding", "Essentials Program",
              "Cathy Family Involvement", "Truett's Legacy",
              "Core Values", "Corporate Purpose", "Cultural Reinforcement", "Cultural Consistency", "Biblical Principles", "Purpose-Driven Work", "Cultural Storytelling", "Cultural Cornerstones", "Staff Accountability",
              "Cultural Celebrations", "Cultural Events", "Cultural Cornerstone Events", "Cultural Ambassadors", "Cultural Adaptation", "Founder's Day", "Staff Summit", "NEXT",
              "Cultural Fit", "Hiring Practices", "Cultural Training", "Talent Selection",
              "Team Member Support", "Staff Care", "Team Member Care", "Employee Well-being", "Work-Life Balance", "Employee Empowerment", "Employee Recognition", "Team Retreats", "Employee Benefits",
              "Community Service", "Community Engagement", "Staff-Community Engagement",
              "Employee Feedback", "Employee Development", "Mentorship Programs", "Leadership Development", "Team Development", "Accountability & Feedback", "Recognition Programs", "Personal Connection Opportunities", "Professional Development",
              "Transparent Communication", "Transparency in Decision Making",
              "Customer Focus", "Customer Experience",
              "Operator Engagement", "Staff-Operator Connection"]
}

### Perform the Classification

In [None]:
classified_dfs = {}

In [None]:
start_time = time.time()
start_cost = get_running_cost()

for question in cols:
    q_time = time.time()
    q_cost = get_running_cost()
    print(f'\n\nQuestion {question}')
    
    
    df = input_df.loc[:,['NAID', question]].dropna().reset_index(drop=True)
    
    print(f'Number of Verbatims: {df.shape[0]}')
    print('---------------------------------')
    
    context = q_context[question]
    topics = sorted(cols[question])
    
    raw_labels: pd.DataFrame = df.apply(lambda row: multiclassifyVerbatim(client, row[question], topics= topics, 
                                                                        context=context, 
                                                                        id=row['NAID'], i=row.name, max_labels=3, n_resp = 1), axis=1)
    
    raw_labels_df = pd.concat(raw_labels.to_list(), ignore_index=True).rename(columns={'ID': 'NAID'})

    classified_dfs[question] = pd.merge(df, raw_labels_df)
    classified_dfs[question].to_pickle(f'Output/Pickles/Topics_Classified_{question}_{date}.pkl')
    
    print('---------------------------------')
    print(f'Question {question} took {time.time() - q_time:.2f} seconds')
    print(f'Cost of Question {question}: ${get_running_cost() - q_cost:.2f}')


print(f'\n\n\nTotal Time: {time.time() - start_time:.2f} seconds')
print(f'Total Cost: ${get_running_cost() - start_cost:.2f}')

In [None]:
# ldate = '2024-02-15'
# for question in cols + ['Q02a_Agree', 'Q02a_Disagree', 'Q04a_Agree', 'Q04a_Disagree', 'Q07a_Agree', 'Q07a_Disagree', 'Q09a_Agree', 'Q09a_Disagree', 'Q15_Paycheck', 'Q15_Passion', 'Q15_Neutral']:
#     staff_classified_dfs[question] = pd.read_pickle(f'Output/Pickles/Staff_Topics_Classified_{question}_{ldate}.pkl')

In [None]:
write_to_excel(f'Output/Labels/Topics_Classified_{date}.xlsx', classified_dfs, topics_list)

## Perform Classification with Defintions

In [None]:
topics_defs_dict = {
    "Q5a": {
        "Compensation Concerns": "",
        "Commision Structure": "",
        "Receiving Leads Concerns": "",
        "Prospecting": "Anything related to prospecting, client acquisition, generating new business or competition.",
        "Challenging Job/ Challenging Market": "",
        "Client Experience Issues": "",
        "Client Onboarding/Application Challenges": "Anything related to complexity in the client's application, e-app process, or onboarding.",
        "Client Communication": "Anything related to challenges with communicating with the client, including platforms that allow for securely sharing documents.",
        "Client Service Delays": "",
        "Client Retention Strategies": "Anything related to retention of Clients, should not include retention of Agents.",
        "Client Relationship Management": "",
        "Client Trust Building": "",
        "Inconsistent Information from Service Center": "",
        "Service Center Inefficiency/Issues": "Anything related to the Service Center causing inefficiencies, delays, or other problems.",
        "Marketing Support": "Challenges related to digital marketing, local marketing, and product marketing support.",
        "Lack of Agent Support": "General lack of agent support, including challenges with agent resource allocation.",
        "Sales Strategy Support": "Include sales strategy and business growth support.",
        "Concerns With GO": "GO refers to the local office or the leadership based at that office. Include any negative concerns with the GO, office environment or GO staff/leadership.",
        "Underwriting Process Concerns": "Any challenges related to the underwriting or application approval process, including delays.",
        "High Underwriting Standards": "High underwriting standards includes declinations, rejections, or overly strict approvals for new business/clients.",
        "Difficulties With Financial Products": "Financial products includes mentions of general financial products, annuities, mutual funds, investments. This should not include references to life products -- it must mention financial products.",
        "Product Pricing": "",
        "Product Competitiveness": "",
        "Technology/Platform Issues": "",
        "Portal Concerns": "",
        "Service Form E-Signature Limitations": "Include challenges with e-signatures and wet signatures.",
        "System Complexity/Concerns Navigating Systems": "Challenges with systems being overly complex, concerns with not understanding the systems or inability to navigate the systems.",
        "System Integration Concerns": "",
        "Redundant Processes": "References to redundant processes or redundant systems/platforms.",
        "Operational Inefficiency": "",
        "Operational Errors": "Mentions of operational errors or operational error resolution issues.",
        "Marketing Restrictions": "",
        "SMRU": "",
        "Compliance Challenges": "",
        "Paperwork Requirements": "",
        "Training Gaps": "Includes general or non-specific mentions of not feeling sufficiently trained.",
        "Product Training": "Includes mentions of life or financial product training specifically.",
        "Agent Technology Training": "Includes mentions of technology, system, or platform training for the Agent specifically.",
        "Service Team Training": "Includes mention of the Service or Service Center team needing more training.",
        "Lack of Communication": "",
        "Lack of Agent Collaboration": "Lack of agent collaboration should also include mentions of agent networking concerns."
        }
}


net_groups = {
    "Q5a": {
        'Agent Compensation': ['Compensation Concerns', 'Commision Structure'],
        'New Business': ['Receiving Leads Concerns', 'Prospecting', 'Challenging Job/ Challenging Market'],
        'Client Servicing': ['Client Experience Issues', 'Client Onboarding/Application Challenges', 'Client Communication', 'Client Service Delays'],
        'Client Retention/Managing Client Relationship': ['Client Retention Strategies', 'Client Relationship Management', 'Client Trust Building'],
        'Service Center': ['Inconsistent Information from Service Center', 'Service Center Inefficiency/Issues'],
        'Support': ['Marketing Support', 'Lack of Agent Support', 'Sales Strategy Support', 'Concerns With GO'],
        'Underwriting': ['Underwriting Process Concerns', 'High Underwriting Standards'],
        'Product': ['Difficulties With Financial Products', 'Product Pricing', 'Product Competitiveness'],
        'Technology': ['Technology/Platform Issues', 'Portal Concerns', 'Service Form E-Signature Limitations', 'System Complexity/Concerns Navigating Systems', 'System Integration Concerns'],
        'Efficiency': ['Redundant Processes', 'Operational Inefficiency', 'Operational Errors'],
        'Compliance': ['Marketing Restrictions', 'SMRU', 'Compliance Challenges', 'Paperwork Requirements'],
        'Training': ['Training Gaps', 'Product Training', 'Agent Technology Training', 'Service Team Training'],
        'Communication': ['Lack of Communication'],
        'Agent-Agent Relationship': ['Lack of Agent Collaboration'],
        'Other': ['Other']
    }
}

In [None]:
topics_for_defs = {k: list(topics_defs_dict[k].keys()) for k in topics_defs_dict.keys()}

# Remake the topics_dict_w_defs with the definitions with only topics that are not empty
topics_dict_w_defs = {K: {k: v for k, v in v.items() if v} for K, v in topics_defs_dict.items()}

In [None]:
c_w_defs = {}

In [None]:
start_time = time.time()
start_cost = get_running_cost()

for question in cols:
    q_time = time.time()
    q_cost = get_running_cost()
    print(f'\n\nQuestion {question}')
    
    
    df = input_df.loc[:,['NAID', question]].dropna().reset_index(drop=True)
    
    print(f'Number of Verbatims: {df.shape[0]}')
    print('---------------------------------')
    
    context = q_context[question] 
    # additional_context = "Only classify topics if the response is negative towards or identifies a concern, problem, or challenge related to them. Positve or neutral responses should not be classified into the topic." if question == 'Q5a' else None 
    additional_context = ""
        
    topics = topics_for_defs[question]
    defs = topics_defs_dict[question]
    
    raw_labels: pd.DataFrame = df.apply(lambda row: multiclassifyVerbatimwDefs(client, row[question], topics= topics, defs=defs,
                                                                        context=context, additional_context=additional_context, model='gpt-4o',
                                                                        id=row['NAID'], i=row.name, max_labels=3, n_resp = 1), axis=1)
    
    raw_labels_df = pd.concat(raw_labels.to_list(), ignore_index=True).rename(columns={'ID': 'NAID'})

    c_w_defs[question] = pd.merge(df, raw_labels_df)
    c_w_defs[question].to_pickle(f'Output/Pickles/Topics_Classified_W_Defs_{question}_{date}.pkl')
    
    print('---------------------------------')
    print(f'Question {question} took {(time.time() - q_time) / 60:.2f} minutes')
    print(f'Cost of Question {question}: ${get_running_cost() - q_cost:.2f}')


print(f'\n\n\nTotal Time: {(time.time() - start_time) / 60:.2f} minutes')
print(f'Total Cost: ${get_running_cost() - start_cost:.2f}')

In [None]:
write_to_excel(f'Output/Labels/{key}_Topics_Classified_w_Defs_and_Net_Counts_{date}.xlsx', c_w_defs, topics_for_defs, net_groups, {'Q5a': '#FFCC99'})