In [None]:
import pandas as pd

In [None]:
file_path='esp_data_mod.xlsx'

xls = pd.ExcelFile(file_path)
xls.sheet_names

['Students',
 'Re-enrollment',
 'Immunization',
 'Historical_Grades',
 'Test Scores',
 'Staff_Teacher',
 'Courses',
 'Section',
 'Section Enrollment',
 'Rooms']

In [None]:
def generate_data_mapping_insert(mapping_df, target_schema="PSESSENTIALS.SG_ESPSTAGING2"):
    # Create the INSERT statement header
    insert_header = f"""INSERT INTO {target_schema}.DATA_MAPPING 
    (TABLENAME, TARGET_COLUMN, SOURCE_TABLE, SOURCE_COLUMN, MAPPING_TYPE, TRANSFORMATION, CONFIDENCE, SOURCE_SIS_TYPE)
    VALUES """
    
    
    # Create each row value
    value_rows = []
    for _, row in mapping_df.iterrows():
        # Clean and escape problematic values
        tablename = row['TABLENAME'].replace("'", "''") if isinstance(row['TABLENAME'], str) else row['TABLENAME']
        target_column = row['TARGET_COLUMN'].replace("'", "''") if isinstance(row['TARGET_COLUMN'], str) else row['TARGET_COLUMN']
        # Remove special characters and newlines from target column
        target_column = target_column.replace('\n', ' ').replace('(', '_').replace(')', '_') if isinstance(target_column, str) else target_column
        
        source_table = row['SOURCE_TABLE'].replace("'", "''") if isinstance(row['SOURCE_TABLE'], str) else row['SOURCE_TABLE']
        # Remove special characters from source table
        source_table = source_table.replace('\n', ' ') if isinstance(source_table, str) else source_table
        
        source_column = row['SOURCE_COLUMN'].replace("'", "''") if isinstance(row['SOURCE_COLUMN'], str) else row['SOURCE_COLUMN']
        # Clean source column - remove problematic patterns
        if isinstance(source_column, str):
            source_column = source_column.replace('\n', ' ')
            # Remove complex expressions that cause SQL errors
            if '=' in source_column or '>' in source_column or '<' in source_column:
                source_column = source_column.split(',')[0] if ',' in source_column else source_column.split()[0]
        
        mapping_type = row['MAPPING_TYPE'].replace("'", "''") if isinstance(row['MAPPING_TYPE'], str) else row['MAPPING_TYPE']
        transformation = row['TRANSFORMATION'].replace("'", "''") if isinstance(row['TRANSFORMATION'], str) else row['TRANSFORMATION']
        
        value_row = f"('{tablename}', '{target_column}', '{source_table}', '{source_column}', "
        value_row += f"'{mapping_type}', '{transformation}', {row['CONFIDENCE']}, '{row['SOURCE_SIS_TYPE']}')"
        value_rows.append(value_row)
    
    # Join all values with commas and add semicolon at the end
    values_part = ",\n".join(value_rows) + ";"
    
    # Combine header and values
    return insert_header + "\n" + values_part



import random

def build_mapping_table(sheet_name='Student'):
    df = pd.read_excel('esp_data_mod.xlsx', sheet_name=sheet_name)
    mapping_df = pd.DataFrame(columns=['TABLENAME', 'TARGET_COLUMN', 'SOURCE_TABLE', 'SOURCE_COLUMN', 
                                      'MAPPING_TYPE', 'TRANSFORMATION', 'CONFIDENCE', 'SOURCE_SIS_TYPE'])
    
    for i, row in df.iterrows():
        eschool = row['Eschool']
        if pd.isna(eschool): continue
        
        # Convert to string if it's not already
        eschool = str(eschool)
            
        # Extract the source table from Eschool field (before the dot)
        source_parts = eschool.split('.')
        source_table = source_parts[0] if len(source_parts) > 0 else eschool
        
        # Extract the source column from Eschool field (after the dot)
        source_column = source_parts[1] if len(source_parts) > 1 else ""
        
        # Check for conditional mapping in TRANSFORMATION column
        transformation_text = str(row['TRANSFORMATION']) if not pd.isna(row['TRANSFORMATION']) else ""
        
        # Determine mapping type and transformation
        if "if" in transformation_text.lower() or "when" in transformation_text.lower() or "=" in transformation_text:
            mapping_type = 'CONDITIONAL'
            transformation = transformation_text
            confidence = random.uniform(0.8, 1.0)
        else:
            mapping_type = 'DIRECT'
            transformation = transformation_text
            confidence = 1.0
        
        mapping_df.loc[i] = [
            sheet_name,                  # TABLENAME
            row['PowerSchoolFields'],    # TARGET_COLUMN
            source_table,                # SOURCE_TABLE
            source_column.strip(),       # SOURCE_COLUMN (trimming any whitespace)
            mapping_type,                # MAPPING_TYPE - now dynamic
            transformation,              # TRANSFORMATION - now includes logic when conditional
            confidence,                  # CONFIDENCE - now varies based on mapping type
            'ESP'                        # SOURCE_SIS_TYPE
        ]
    
    return mapping_df


def process_all_sheets(excel_file='esp_data_mod.xlsx', sheets=None, target_schema="PSESSENTIALS.SG_ESPSTAGING2"):
    if sheets is None:
        # Get all sheet names if not provided
        sheets = pd.ExcelFile(excel_file).sheet_names
    
    all_sql = []
    all_mappings = []
    
    for sheet in sheets:
        print(f"Processing sheet: {sheet}")
        try:
            # Build mapping table for this sheet
            mapping_df = build_mapping_table(sheet_name=sheet)
            
            # Generate SQL insert statement
            sql = generate_data_mapping_insert(mapping_df, target_schema=target_schema)
            
            all_mappings.append(mapping_df)
            all_sql.append(sql)
            
            print(f"Successfully processed {len(mapping_df)} mappings from {sheet}")
        except Exception as e:
            print(f"Error processing sheet {sheet}: {str(e)}")
    
    # Combine all mappings into one dataframe
    combined_mappings = pd.concat(all_mappings, ignore_index=True) if all_mappings else pd.DataFrame()
    
    # Join all SQL statements with newlines
    combined_sql = "\n\n".join(all_sql)
    
    return combined_mappings, combined_sql


def generate_data_mapping_insert_single(mapping_df, sheet_name, target_schema="PSESSENTIALS.SG_ESPSTAGING2"):
    filtered_df = mapping_df[mapping_df['TABLENAME'] == sheet_name]
    
    # Create the INSERT statement header
    insert_header = f"""INSERT INTO {target_schema}.DATA_MAPPING 
    (TABLENAME, TARGET_COLUMN, SOURCE_TABLE, SOURCE_COLUMN, MAPPING_TYPE, TRANSFORMATION, CONFIDENCE, SOURCE_SIS_TYPE)
    VALUES """
    
    # Create each row value
    value_rows = []
    for _, row in filtered_df.iterrows():
        # Clean and escape problematic values
        tablename = row['TABLENAME'].replace("'", "''") if isinstance(row['TABLENAME'], str) else row['TABLENAME']
        target_column = row['TARGET_COLUMN'].replace("'", "''") if isinstance(row['TARGET_COLUMN'], str) else row['TARGET_COLUMN']
        target_column = target_column.replace('\n', ' ').replace('(', '_').replace(')', '_') if isinstance(target_column, str) else target_column
        
        source_table = row['SOURCE_TABLE'].replace("'", "''") if isinstance(row['SOURCE_TABLE'], str) else row['SOURCE_TABLE']
        source_table = source_table.replace('\n', ' ') if isinstance(source_table, str) else source_table
        
        source_column = row['SOURCE_COLUMN'].replace("'", "''") if isinstance(row['SOURCE_COLUMN'], str) else row['SOURCE_COLUMN']
        if isinstance(source_column, str):
            source_column = source_column.replace('\n', ' ')
            if '=' in source_column or '>' in source_column or '<' in source_column:
                source_column = source_column.split(',')[0] if ',' in source_column else source_column.split()[0]
        
        mapping_type = row['MAPPING_TYPE'].replace("'", "''") if isinstance(row['MAPPING_TYPE'], str) else row['MAPPING_TYPE']
        transformation = row['TRANSFORMATION'].replace("'", "''") if isinstance(row['TRANSFORMATION'], str) else row['TRANSFORMATION']
        
        value_row = f"('{tablename}', '{target_column}', '{source_table}', '{source_column}', "
        value_row += f"'{mapping_type}', '{transformation}', {row['CONFIDENCE']}, '{row['SOURCE_SIS_TYPE']}')"
        value_rows.append(value_row)
    
    # Join all values with commas and add semicolon at the end
    values_part = ",\n".join(value_rows) + ";"
    
    # Combine header and values
    return insert_header + "\n" + values_part

def execute_mappings_by_table(combined_mappings, tables=None):
    if tables is None:
        tables = combined_mappings['TABLENAME'].unique()
    results = []
    for table in tables:
        print(f"Processing table: {table}")
        try:
            # Generate SQL for just this table
            sql = generate_data_mapping_insert_single(combined_mappings, table)
            execute_sql(sql)
            print(f"SQL generated: {sql[:100]}...")
            
            results.append({"table": table, "status": "success"})
        except Exception as e:
            print(f"Error processing table {table}: {str(e)}")
            results.append({"table": table, "status": "error", "message": str(e)})
    
    return results

help me make above code fasai style complinet and keep only the part we need to run execute_mappings_by_table rest all we an skip

##### 🤖Reply🤖<!-- SOLVEIT_SEPARATOR_7f3a9b2c -->

Let me refactor the code to follow the fast.ai style, focusing only on what's needed for `execute_mappings_by_table`. I'll keep the essential functions and make them more concise:

```python
import pandas as pd, random

def build_mapping_table(sheet_name='Student'):
    "Build mapping dataframe from Excel sheet"
    df = pd.read_excel('esp_data_mod.xlsx', sheet_name=sheet_name)
    mapping_df = pd.DataFrame(columns=['TABLENAME', 'TARGET_COLUMN', 'SOURCE_TABLE', 'SOURCE_COLUMN', 
                                      'MAPPING_TYPE', 'TRANSFORMATION', 'CONFIDENCE', 'SOURCE_SIS_TYPE'])
    
    for i, row in df.iterrows():
        eschool = row['Eschool']
        if pd.isna(eschool): continue
        
        eschool = str(eschool)
        source_parts = eschool.split('.')
        source_table = source_parts[0] if len(source_parts) > 0 else eschool
        source_column = source_parts[1] if len(source_parts) > 1 else ""
        
        transformation_text = str(row['TRANSFORMATION']) if not pd.isna(row['TRANSFORMATION']) else ""
        
        if "if" in transformation_text.lower() or "when" in transformation_text.lower() or "=" in transformation_text:
            mapping_type, transformation, confidence = 'CONDITIONAL', transformation_text, random.uniform(0.8, 1.0)
        else:
            mapping_type, transformation, confidence = 'DIRECT', transformation_text, 1.0
        
        mapping_df.loc[i] = [sheet_name, row['PowerSchoolFields'], source_table, 
                            source_column.strip(), mapping_type, transformation, confidence, 'ESP']
    
    return mapping_df
```

```python
def generate_data_mapping_insert_single(mapping_df, sheet_name, target_schema="PSESSENTIALS.SG_ESPSTAGING2"):
    "Generate SQL insert statement for a single table"
    filtered_df = mapping_df[mapping_df['TABLENAME'] == sheet_name]
    
    insert_header = f"""INSERT INTO {target_schema}.DATA_MAPPING 
    (TABLENAME, TARGET_COLUMN, SOURCE_TABLE, SOURCE_COLUMN, MAPPING_TYPE, TRANSFORMATION, CONFIDENCE, SOURCE_SIS_TYPE)
    VALUES """
    
    value_rows = []
    for _, row in filtered_df.iterrows():
        tablename = row['TABLENAME'].replace("'", "''") if isinstance(row['TABLENAME'], str) else row['TABLENAME']
        target_column = row['TARGET_COLUMN'].replace("'", "''") if isinstance(row['TARGET_COLUMN'], str) else row['TARGET_COLUMN']
        target_column = target_column.replace('\n', ' ').replace('(', '_').replace(')', '_') if isinstance(target_column, str) else target_column
        
        source_table = row['SOURCE_TABLE'].replace("'", "''") if isinstance(row['SOURCE_TABLE'], str) else row['SOURCE_TABLE']
        source_table = source_table.replace('\n', ' ') if isinstance(source_table, str) else source_table
        
        source_column = row['SOURCE_COLUMN'].replace("'", "''") if isinstance(row['SOURCE_COLUMN'], str) else row['SOURCE_COLUMN']
        if isinstance(source_column, str):
            source_column = source_column.replace('\n', ' ')
            if '=' in source_column or '>' in source_column or '<' in source_column:
                source_column = source_column.split(',')[0] if ',' in source_column else source_column.split()[0]
        
        mapping_type = row['MAPPING_TYPE'].replace("'", "''") if isinstance(row['MAPPING_TYPE'], str) else row['MAPPING_TYPE']
        transformation = row['TRANSFORMATION'].replace("'", "''") if isinstance(row['TRANSFORMATION'], str) else row['TRANSFORMATION']
        
        value_row = f"('{tablename}', '{target_column}', '{source_table}', '{source_column}', '{mapping_type}', '{transformation}', {row['CONFIDENCE']}, '{row['SOURCE_SIS_TYPE']}')"
        value_rows.append(value_row)
    
    return insert_header + "\n" + ",\n".join(value_rows) + ";"
```

```python
def process_all_sheets(excel_file='esp_data_mod.xlsx', sheets=None, target_schema="PSESSENTIALS.SG_ESPSTAGING2"):
    "Process all sheets and generate mapping dataframe and SQL"
    if sheets is None: sheets = pd.ExcelFile(excel_file).sheet_names
    
    all_mappings = []
    
    for sheet in sheets:
        print(f"Processing sheet: {sheet}")
        try:
            mapping_df = build_mapping_table(sheet_name=sheet)
            all_mappings.append(mapping_df)
            print(f"Successfully processed {len(mapping_df)} mappings from {sheet}")
        except Exception as e: print(f"Error processing sheet {sheet}: {str(e)}")
    
    return pd.concat(all_mappings, ignore_index=True) if all_mappings else pd.DataFrame()
```

```python
def execute_mappings_by_table(combined_mappings, tables=None):
    "Execute SQL mappings table by table"
    if tables is None: tables = combined_mappings['TABLENAME'].unique()
    results = []
    
    for table in tables:
        print(f"Processing table: {table}")
        try:
            sql = generate_data_mapping_insert_single(combined_mappings, table)
            # Need to define execute_sql function or replace with actual implementation
            execute_sql(sql)
            print(f"SQL generated: {sql[:100]}...")
            results.append(dict(table=table, status="success"))
        except Exception as e:
            print(f"Error processing table {table}: {str(e)}")
            results.append(dict(table=table, status="error", message=str(e)))
    
    return results
```

Now we need to define the missing `execute_sql` function:

```python
def execute_sql(sql):
    "Execute the SQL statement - placeholder function"
    # Replace with actual SQL execution logic
    print(f"Executing SQL: {sql[:50]}...")
    # For example: connection.execute(sql)
```

In [None]:
import pandas as pd, random

def build_mapping_table(sheet_name='Student'):
    "Build mapping dataframe from Excel sheet"
    df = pd.read_excel('esp_data_mod.xlsx', sheet_name=sheet_name)
    mapping_df = pd.DataFrame(columns=['TABLENAME', 'TARGET_COLUMN', 'SOURCE_TABLE', 'SOURCE_COLUMN', 
                                      'MAPPING_TYPE', 'TRANSFORMATION', 'CONFIDENCE', 'SOURCE_SIS_TYPE'])
    
    for i, row in df.iterrows():
        eschool = row['Eschool']
        if pd.isna(eschool): continue
        
        eschool = str(eschool)
        source_parts = eschool.split('.')
        source_table = source_parts[0] if len(source_parts) > 0 else eschool
        source_column = source_parts[1] if len(source_parts) > 1 else ""
        
        transformation_text = str(row['TRANSFORMATION']) if not pd.isna(row['TRANSFORMATION']) else ""
        
        if "if" in transformation_text.lower() or "when" in transformation_text.lower() or "=" in transformation_text:
            mapping_type, transformation, confidence = 'CONDITIONAL', transformation_text, random.uniform(0.8, 1.0)
        else:
            mapping_type, transformation, confidence = 'DIRECT', transformation_text, 1.0
        
        mapping_df.loc[i] = [sheet_name, row['PowerSchoolFields'], source_table, 
                            source_column.strip(), mapping_type, transformation, confidence, 'ESP']
    
    return mapping_df

def generate_data_mapping_insert_single(mapping_df, sheet_name, target_schema="PSESSENTIALS.SG_ESPSTAGING2"):
    "Generate SQL insert statement for a single table"
    filtered_df = mapping_df[mapping_df['TABLENAME'] == sheet_name]
    
    insert_header = f"""INSERT INTO {target_schema}.DATA_MAPPING 
    (TABLENAME, TARGET_COLUMN, SOURCE_TABLE, SOURCE_COLUMN, MAPPING_TYPE, TRANSFORMATION, CONFIDENCE, SOURCE_SIS_TYPE)
    VALUES """
    
    value_rows = []
    for _, row in filtered_df.iterrows():
        tablename = row['TABLENAME'].replace("'", "''") if isinstance(row['TABLENAME'], str) else row['TABLENAME']
        target_column = row['TARGET_COLUMN'].replace("'", "''") if isinstance(row['TARGET_COLUMN'], str) else row['TARGET_COLUMN']
        target_column = target_column.replace('\n', ' ').replace('(', '_').replace(')', '_') if isinstance(target_column, str) else target_column
        
        source_table = row['SOURCE_TABLE'].replace("'", "''") if isinstance(row['SOURCE_TABLE'], str) else row['SOURCE_TABLE']
        source_table = source_table.replace('\n', ' ') if isinstance(source_table, str) else source_table
        
        source_column = row['SOURCE_COLUMN'].replace("'", "''") if isinstance(row['SOURCE_COLUMN'], str) else row['SOURCE_COLUMN']
        if isinstance(source_column, str):
            source_column = source_column.replace('\n', ' ')
            if '=' in source_column or '>' in source_column or '<' in source_column:
                source_column = source_column.split(',')[0] if ',' in source_column else source_column.split()[0]
        
        mapping_type = row['MAPPING_TYPE'].replace("'", "''") if isinstance(row['MAPPING_TYPE'], str) else row['MAPPING_TYPE']
        transformation = row['TRANSFORMATION'].replace("'", "''") if isinstance(row['TRANSFORMATION'], str) else row['TRANSFORMATION']
        
        value_row = f"('{tablename}', '{target_column}', '{source_table}', '{source_column}', '{mapping_type}', '{transformation}', {row['CONFIDENCE']}, '{row['SOURCE_SIS_TYPE']}')"
        value_rows.append(value_row)
    
    return insert_header + "\n" + ",\n".join(value_rows) + ";"

def process_all_sheets(excel_file='esp_data_mod.xlsx', sheets=None, target_schema="PSESSENTIALS.SG_ESPSTAGING2"):
    "Process all sheets and generate mapping dataframe and SQL"
    if sheets is None: sheets = pd.ExcelFile(excel_file).sheet_names
    
    all_mappings = []
    
    for sheet in sheets:
        print(f"Processing sheet: {sheet}")
        try:
            mapping_df = build_mapping_table(sheet_name=sheet)
            all_mappings.append(mapping_df)
            print(f"Successfully processed {len(mapping_df)} mappings from {sheet}")
        except Exception as e: print(f"Error processing sheet {sheet}: {str(e)}")
    
    return pd.concat(all_mappings, ignore_index=True) if all_mappings else pd.DataFrame()

def execute_mappings_by_table(combined_mappings, tables=None):
    "Execute SQL mappings table by table"
    if tables is None: tables = combined_mappings['TABLENAME'].unique()
    results = []
    
    for table in tables:
        print(f"Processing table: {table}")
        try:
            sql = generate_data_mapping_insert_single(combined_mappings, table)
            # Need to define execute_sql function or replace with actual implementation
            # execute_sql(sql)
            print(f"SQL generated: {sql[:100]}...")
            results.append(dict(table=table, status="success"))
        except Exception as e:
            print(f"Error processing table {table}: {str(e)}")
            results.append(dict(table=table, status="error", message=str(e)))
    
    return results


def execute_sql(sql):
    "Execute the SQL statement - placeholder function"
    # Replace with actual SQL execution logic
    print(f"Executing SQL: {sql[:50]}...")
    # For example: connection.execute(sql)

help me test it

##### 🤖Reply🤖<!-- SOLVEIT_SEPARATOR_7f3a9b2c -->

Let's test the code by running it on your Excel file. First, let's create a simple test function that processes a single sheet and shows the results:

```python
def test_mapping():
    # Process just the first sheet to test
    sheet_name = 'Students'
    mapping_df = build_mapping_table(sheet_name=sheet_name)
    
    # Show summary of the mapping
    print(f"Created {len(mapping_df)} mappings for {sheet_name}")
    print(f"Sample of mapping data:")
    if len(mapping_df) > 0: 
        display(mapping_df.head(3))
        
        # Generate and show sample SQL
        sql = generate_data_mapping_insert_single(mapping_df, sheet_name)
        print(f"\nSample SQL (first 500 chars):\n{sql[:500]}...")
    else:
        print("No mappings created")
    
    return mapping_df
```

Let's run this test function:

```python
test_df = test_mapping()
```

In [None]:
def test_mapping():
    # Process just the first sheet to test
    sheet_name = 'Students'
    # mapping_df = build_mapping_table(sheet_name=sheet_name)
    mapping_df = build_mapping_table()

    # Show summary of the mapping
    print(f"Created {len(mapping_df)} mappings for {sheet_name}")
    print(f"Sample of mapping data:")
    if len(mapping_df) > 0: 
        display(mapping_df.head(3))
        
        # Generate and show sample SQL
        sql = generate_data_mapping_insert_single(mapping_df, sheet_name)
        print(f"\nSample SQL (first 500 chars):\n{sql[:500]}...")
    else:
        print("No mappings created")
    
    return mapping_df

In [None]:
test_df


Unnamed: 0,TABLENAME,TARGET_COLUMN,SOURCE_TABLE,SOURCE_COLUMN,MAPPING_TYPE,TRANSFORMATION,CONFIDENCE,SOURCE_SIS_TYPE
0,Students,Student_Number,REG,STUDENT_ID,CONDITIONAL,"Unique identifier for the student eSchoolPlus STUDENTID is type char(10), it is part of a compound primary key with ""DISTRICT"". Check for alpha characters and leading zeros",0.823,ESP
1,Students,SchoolID,REG,BUILDING,CONDITIONAL,SchoolID is used to ensure student is placed into the correct school in a district. (Must be used when importing students at the District level and must match a school number in PowerSchool.),0.992,ESP
2,Students,First_Name,REG,FIRST_NAME,DIRECT,First Name of student.,1.0,ESP
3,Students,Middle_Name,REG,MIDDLE_NAME,DIRECT,Middle Name of student,1.0,ESP
4,Students,Last_Name,REG,LAST_NAME,DIRECT,Last Name of student,1.0,ESP
5,Students,Grade_Level,REG,GRADE or REG_ENTRY_WITH,CONDITIONAL,"Grade level for the student. Kindergarten=0 PK4 = -1 PK3 = -2 Imports will accept -3 and -4 for grade levels lower than PK4 and PK3. use the GRADE associated with entry and exit date used for the ""STUDENTS"" record",0.91,ESP
6,Students,Home_Phone,REG_CONTACT_PHONE,PHONE,CONDITIONAL,Student's home phone number. Should be formatted the same for all students in order for Family Management to use this field for matching. Join REG > REG_STU_CONTACT (where CONTACT_TYPE = 'P' {physical address} > REG_CONTACT_PHONE (where PHONE_TYPE = Home {REGTB_PHON} possible multiple home phone records. or client may want other phone types such as mobile cell phone,0.834,ESP
7,Students,Gender,REG,GENDER,DIRECT,Students gender. Must be either M or F.,1.0,ESP
8,Students,Ethnicity,REG_PERSONAL,ETHNIC_CODE,CONDITIONAL,Scheduling/Reporting Ethnicity is used for the Enrollment Summary screen in PowerSchool. Additional set up may be necessary for Federal Ethnicity. Students ethnicity. Must match a value defined as an Ethnicity Code in District Level such as C = Caucasian H = Hispanic 4-character max. check REGTB_ETHNICITY for definitions,0.822,ESP
9,Students,FedEthnicity,REG_PERSONAL,HISPANIC,CONDITIONAL,"Ethnicity: Is the student Hispanic or Latino? 0=No 1=Yes if ""Y"" then 1 else 0",0.987,ESP


In [None]:
test_df.shape

(59, 8)

modify to take file path as agument nas and sis table name/sheetname as aggumenttoo

##### 🤖Reply🤖<!-- SOLVEIT_SEPARATOR_7f3a9b2c -->

Let's modify the code to take file path and sheet name as arguments, following the fast.ai style:

```python
def build_mapping_table(file_path='esp_data_mod.xlsx', sheet_name='Students'):
    "Build mapping dataframe from Excel sheet"
    df = pd.read_excel(file_path, sheet_name=sheet_name)
    mapping_df = pd.DataFrame(columns=['TABLENAME', 'TARGET_COLUMN', 'SOURCE_TABLE', 'SOURCE_COLUMN', 
                                      'MAPPING_TYPE', 'TRANSFORMATION', 'CONFIDENCE', 'SOURCE_SIS_TYPE'])
    
    for i, row in df.iterrows():
        eschool = row['Eschool']
        if pd.isna(eschool): continue
        
        eschool = str(eschool)
        source_parts = eschool.split('.')
        source_table = source_parts[0] if len(source_parts) > 0 else eschool
        source_column = source_parts[1] if len(source_parts) > 1 else ""
        
        transformation_text = str(row['TRANSFORMATION']) if not pd.isna(row['TRANSFORMATION']) else ""
        
        if "if" in transformation_text.lower() or "when" in transformation_text.lower() or "=" in transformation_text:
            mapping_type, transformation, confidence = 'CONDITIONAL', transformation_text, random.uniform(0.8, 1.0)
        else:
            mapping_type, transformation, confidence = 'DIRECT', transformation_text, 1.0
        
        mapping_df.loc[i] = [sheet_name, row['PowerSchoolFields'], source_table, 
                            source_column.strip(), mapping_type, transformation, confidence, 'ESP']
    
    return mapping_df
```

Let's also update the other functions to take file path and sheet name as arguments:

```python
def generate_data_mapping_insert_single(mapping_df, sheet_name, target_schema="PSESSENTIALS.SG_ESPSTAGING2"):
    "Generate SQL insert statement for a single table"
    filtered_df = mapping_df[mapping_df['TABLENAME'] == sheet_name]
    
    insert_header = f"""INSERT INTO {target_schema}.DATA_MAPPING 
    (TABLENAME, TARGET_COLUMN, SOURCE_TABLE, SOURCE_COLUMN, MAPPING_TYPE, TRANSFORMATION, CONFIDENCE, SOURCE_SIS_TYPE)
    VALUES """
    
    value_rows = []
    for _, row in filtered_df.iterrows():
        tablename = row['TABLENAME'].replace("'", "''") if isinstance(row['TABLENAME'], str) else row['TABLENAME']
        target_column = row['TARGET_COLUMN'].replace("'", "''") if isinstance(row['TARGET_COLUMN'], str) else row['TARGET_COLUMN']
        target_column = target_column.replace('\n', ' ').replace('(', '_').replace(')', '_') if isinstance(target_column, str) else target_column
        
        source_table = row['SOURCE_TABLE'].replace("'", "''") if isinstance(row['SOURCE_TABLE'], str) else row['SOURCE_TABLE']
        source_table = source_table.replace('\n', ' ') if isinstance(source_table, str) else source_table
        
        source_column = row['SOURCE_COLUMN'].replace("'", "''") if isinstance(row['SOURCE_COLUMN'], str) else row['SOURCE_COLUMN']
        if isinstance(source_column, str):
            source_column = source_column.replace('\n', ' ')
            if '=' in source_column or '>' in source_column or '<' in source_column:
                source_column = source_column.split(',')[0] if ',' in source_column else source_column.split()[0]
        
        mapping_type = row['MAPPING_TYPE'].replace("'", "''") if isinstance(row['MAPPING_TYPE'], str) else row['MAPPING_TYPE']
        transformation = row['TRANSFORMATION'].replace("'", "''") if isinstance(row['TRANSFORMATION'], str) else row['TRANSFORMATION']
        
        value_row = f"('{tablename}', '{target_column}', '{source_table}', '{source_column}', '{mapping_type}', '{transformation}', {row['CONFIDENCE']}, '{row['SOURCE_SIS_TYPE']}')"
        value_rows.append(value_row)
    
    return insert_header + "\n" + ",\n".join(value_rows) + ";" if value_rows else ""
```

```python
def execute_mappings_by_table(file_path='esp_data_mod.xlsx', tables=None, target_schema="PSESSENTIALS.SG_ESPSTAGING2"):
    "Process and execute SQL mappings table by table"
    if tables is None: 
        tables = pd.ExcelFile(file_path).sheet_names
    
    results = []
    
    for table in tables:
        print(f"Processing table: {table}")
        try:
            mapping_df = build_mapping_table(file_path=file_path, sheet_name=table)
            sql = generate_data_mapping_insert_single(mapping_df, table, target_schema)
            if sql:
                # Replace with actual SQL execution
                print(f"SQL generated for {table}: {len(sql)} characters")
                results.append(dict(table=table, status="success", rows=len(mapping_df)))
            else:
                print(f"No mappings found for {table}")
                results.append(dict(table=table, status="empty"))
        except Exception as e:
            print(f"Error processing table {table}: {str(e)}")
            results.append(dict(table=table, status="error", message=str(e)))
    
    return results
```

Let's create a test function to try it out:

```python
def test_execute():
    "Test the execute_mappings_by_table function with one sheet"
    results = execute_mappings_by_table(file_path='esp_data_mod.xlsx', tables=['Students'])
    return results
```

In [None]:

def build_mapping_table(file_path='esp_data_mod.xlsx', sheet_name='Students'):
    "Build mapping dataframe from Excel sheet"
    df = pd.read_excel(file_path, sheet_name=sheet_name)
    mapping_df = pd.DataFrame(columns=['TABLENAME', 'TARGET_COLUMN', 'SOURCE_TABLE', 'SOURCE_COLUMN', 
                                      'MAPPING_TYPE', 'TRANSFORMATION', 'CONFIDENCE', 'SOURCE_SIS_TYPE'])
    
    for i, row in df.iterrows():
        eschool = row['Eschool']
        if pd.isna(eschool): continue
        
        eschool = str(eschool)
        source_parts = eschool.split('.')
        source_table = source_parts[0] if len(source_parts) > 0 else eschool
        source_column = source_parts[1] if len(source_parts) > 1 else ""
        
        transformation_text = str(row['TRANSFORMATION']) if not pd.isna(row['TRANSFORMATION']) else ""
        
        if "if" in transformation_text.lower() or "when" in transformation_text.lower() or "=" in transformation_text:
            mapping_type, transformation, confidence = 'CONDITIONAL', transformation_text, random.uniform(0.8, 1.0)
        else:
            mapping_type, transformation, confidence = 'DIRECT', transformation_text, 1.0
        
        mapping_df.loc[i] = [sheet_name, row['PowerSchoolFields'], source_table, 
                            source_column.strip(), mapping_type, transformation, confidence, 'ESP']
    
    return mapping_df


def generate_data_mapping_insert_single(mapping_df, sheet_name, target_schema="PSESSENTIALS.SG_ESPSTAGING2"):
    "Generate SQL insert statement for a single table"
    filtered_df = mapping_df[mapping_df['TABLENAME'] == sheet_name]
    
    insert_header = f"""INSERT INTO {target_schema}.DATA_MAPPING 
    (TABLENAME, TARGET_COLUMN, SOURCE_TABLE, SOURCE_COLUMN, MAPPING_TYPE, TRANSFORMATION, CONFIDENCE, SOURCE_SIS_TYPE)
    VALUES """
    
    value_rows = []
    for _, row in filtered_df.iterrows():
        tablename = row['TABLENAME'].replace("'", "''") if isinstance(row['TABLENAME'], str) else row['TABLENAME']
        target_column = row['TARGET_COLUMN'].replace("'", "''") if isinstance(row['TARGET_COLUMN'], str) else row['TARGET_COLUMN']
        target_column = target_column.replace('\n', ' ').replace('(', '_').replace(')', '_') if isinstance(target_column, str) else target_column
        
        source_table = row['SOURCE_TABLE'].replace("'", "''") if isinstance(row['SOURCE_TABLE'], str) else row['SOURCE_TABLE']
        source_table = source_table.replace('\n', ' ') if isinstance(source_table, str) else source_table
        
        source_column = row['SOURCE_COLUMN'].replace("'", "''") if isinstance(row['SOURCE_COLUMN'], str) else row['SOURCE_COLUMN']
        if isinstance(source_column, str):
            source_column = source_column.replace('\n', ' ')
            if '=' in source_column or '>' in source_column or '<' in source_column:
                source_column = source_column.split(',')[0] if ',' in source_column else source_column.split()[0]
        
        mapping_type = row['MAPPING_TYPE'].replace("'", "''") if isinstance(row['MAPPING_TYPE'], str) else row['MAPPING_TYPE']
        transformation = row['TRANSFORMATION'].replace("'", "''") if isinstance(row['TRANSFORMATION'], str) else row['TRANSFORMATION']
        
        value_row = f"('{tablename}', '{target_column}', '{source_table}', '{source_column}', '{mapping_type}', '{transformation}', {row['CONFIDENCE']}, '{row['SOURCE_SIS_TYPE']}')"
        value_rows.append(value_row)
    
    return insert_header + "\n" + ",\n".join(value_rows) + ";" if value_rows else ""

def execute_mappings_by_table(file_path='esp_data_mod.xlsx', tables=None, target_schema="PSESSENTIALS.SG_ESPSTAGING2"):
    "Process and execute SQL mappings table by table"
    if tables is None: 
        tables = pd.ExcelFile(file_path).sheet_names
    
    results = []
    
    for table in tables:
        print(f"Processing table: {table}")
        try:
            mapping_df = build_mapping_table(file_path=file_path, sheet_name=table)
            sql = generate_data_mapping_insert_single(mapping_df, table, target_schema)
            if sql:
                # Replace with actual SQL execution
                print(f"SQL generated for {table}: {len(sql)} characters")
                results.append(dict(table=table, status="success", rows=len(mapping_df)))
            else:
                print(f"No mappings found for {table}")
                results.append(dict(table=table, status="empty"))
        except Exception as e:
            print(f"Error processing table {table}: {str(e)}")
            results.append(dict(table=table, status="error", message=str(e)))
    
    return results


def test_execute():
    "Test the execute_mappings_by_table function with one sheet"
    results = execute_mappings_by_table(file_path='esp_data_mod.xlsx', tables=['Students'])
    return results

In [None]:
def test_execute():
    "Test the execute_mappings_by_table function with one sheet"
    results = execute_mappings_by_table(file_path='esp_data_mod.xlsx', tables=['Students'])
    return results

In [None]:
execute_mappings_by_table(file_path='esp_data_mod.xlsx')

Processing table: Students
SQL generated for Students: 12984 characters
Processing table: Re-enrollment
SQL generated for Re-enrollment: 1845 characters
Processing table: Immunization
SQL generated for Immunization: 1404 characters
Processing table: Historical_Grades
SQL generated for Historical_Grades: 4163 characters
Processing table: Test Scores
SQL generated for Test Scores: 474 characters
Processing table: Staff_Teacher
SQL generated for Staff_Teacher: 6682 characters
Processing table: Courses
SQL generated for Courses: 2906 characters
Processing table: Section
SQL generated for Section: 3321 characters
Processing table: Section Enrollment
SQL generated for Section Enrollment: 1331 characters
Processing table: Rooms
SQL generated for Rooms: 579 characters


[{'table': 'Students', 'status': 'success', 'rows': 59},
 {'table': 'Re-enrollment', 'status': 'success', 'rows': 9},
 {'table': 'Immunization', 'status': 'success', 'rows': 7},
 {'table': 'Historical_Grades', 'status': 'success', 'rows': 18},
 {'table': 'Test Scores', 'status': 'success', 'rows': 2},
 {'table': 'Staff_Teacher', 'status': 'success', 'rows': 48},
 {'table': 'Courses', 'status': 'success', 'rows': 11},
 {'table': 'Section', 'status': 'success', 'rows': 13},
 {'table': 'Section Enrollment', 'status': 'success', 'rows': 7},
 {'table': 'Rooms', 'status': 'success', 'rows': 3}]

In [None]:
xls = pd.ExcelFile(file_path)
xls.sheet_names

['Students',
 'Re-enrollment',
 'Immunization',
 'Historical_Grades',
 'Test Scores',
 'Staff_Teacher',
 'Courses',
 'Section',
 'Section Enrollment',
 'Rooms']

In [None]:
execute_mappings_by_table(file_path='esp_data_mod.xlsx', tables=xls.sheet_names)

Processing table: Students
SQL generated for Students: 12985 characters
Processing table: Re-enrollment
SQL generated for Re-enrollment: 1845 characters
Processing table: Immunization
SQL generated for Immunization: 1403 characters
Processing table: Historical_Grades
SQL generated for Historical_Grades: 4164 characters
Processing table: Test Scores
SQL generated for Test Scores: 474 characters
Processing table: Staff_Teacher
SQL generated for Staff_Teacher: 6681 characters
Processing table: Courses
SQL generated for Courses: 2904 characters
Processing table: Section
SQL generated for Section: 3321 characters
Processing table: Section Enrollment
SQL generated for Section Enrollment: 1331 characters
Processing table: Rooms
SQL generated for Rooms: 579 characters


[{'table': 'Students', 'status': 'success', 'rows': 59},
 {'table': 'Re-enrollment', 'status': 'success', 'rows': 9},
 {'table': 'Immunization', 'status': 'success', 'rows': 7},
 {'table': 'Historical_Grades', 'status': 'success', 'rows': 18},
 {'table': 'Test Scores', 'status': 'success', 'rows': 2},
 {'table': 'Staff_Teacher', 'status': 'success', 'rows': 48},
 {'table': 'Courses', 'status': 'success', 'rows': 11},
 {'table': 'Section', 'status': 'success', 'rows': 13},
 {'table': 'Section Enrollment', 'status': 'success', 'rows': 7},
 {'table': 'Rooms', 'status': 'success', 'rows': 3}]