In [3]:
import pandas as pd

In [4]:

def process_cct_data(file_path):
    # Load the Excel file
    xls = pd.ExcelFile(file_path)
    
    all_data = []
    
    # Process each sheet
    for sheet in xls.sheet_names:
        df = pd.read_excel(xls, sheet_name=sheet)
        
        # Strip spaces from column names
        df.columns = df.columns.str.strip()

        # Find the essay column dynamically (assuming it contains "Essay" in the name)
        essay_column = [col for col in df.columns if "Essay" in col][0]

        # Identify CCT columns dynamically (those ending with "FINAL")
        cct_columns = [col for col in df.columns if col.endswith("FINAL")]

        for _, row in df.iterrows():
            essay_text = row[essay_column]

            # Extract sentences from the essay
            if isinstance(essay_text, str):
                sentences = [sent.strip() for sent in essay_text.split(".") if sent.strip()]
            else:
                continue

            # Map each sentence to CCTs
            sentence_cct_map = {}

            for cct in cct_columns:
                cct_text = row[cct]

                if isinstance(cct_text, str):
                    cct_sentences = [sent.strip() for sent in cct_text.split("/%/") if sent.strip()]

                    for sent in cct_sentences:
                        if sent in sentence_cct_map:
                            sentence_cct_map[sent].append(cct.replace(" FINAL", "").strip())  # Clean CCT name
                        else:
                            sentence_cct_map[sent] = [cct.replace(" FINAL", "").strip()]

            # Store the processed data
            for sentence, ccts in sentence_cct_map.items():
                all_data.append({"sentence": sentence, "CCTs": ", ".join(ccts)})

    # Convert to DataFrame
    processed_df = pd.DataFrame(all_data)

    return processed_df

In [5]:
file_path = "all_cap_b1.xlsx"  # Replace with actual file path
processed_df = process_cct_data(file_path)

In [7]:
processed_df.head(100)

Unnamed: 0,sentence,CCTs
0,to better one self and be able to succeed late...,Aspirational
1,I always wanted to be able to help people and ...,Aspirational
2,I always wanted to be able to help people,Spiritual
3,I am also here because the formula to being su...,Aspirational
4,She made a lot of sacrifices for me to be here...,Filial Piety
...,...,...
95,I am here at SFSU due to its cinema program an...,Navigational
96,I am at San Francisco State because I wanted t...,Aspirational
97,I am taking this class because I needed to tak...,Navigational
98,I like the topic and think if really try do do...,Perserverance


In [8]:
# Save processed data
processed_df.to_csv("processed_cct_data.csv", index=False)
print("Processed data saved to 'processed_cct_data.csv'")

Processed data saved to 'processed_cct_data.csv'
