In [None]:
import pandas as pd
import openpyxl
from openpyxl.utils.dataframe import dataframe_to_rows
from openpyxl.styles import PatternFill, Border, Side, Font

# Load workbook
file_path = 'your_workbook.xlsx'
wb = openpyxl.load_workbook(file_path)
summary_rows = []

# Define formatting styles
header_fill = PatternFill(start_color="ADD8E6", end_color="ADD8E6", fill_type="solid")
border = Border(left=Side(style='thin'), right=Side(style='thin'), top=Side(style='thin'), bottom=Side(style='thin'))
header_font = Font(bold=True)
subtable_header_fill = PatternFill(start_color="90EE90", end_color="90EE90", fill_type="solid")  # Light green
subtable_header_font = Font(bold=True)

def clear_sheet(ws):
    """Clear the sheet's existing data and formatting."""
    for row in ws.iter_rows():
        for cell in row:
            cell.value = None
            cell.fill = PatternFill()
            cell.border = Border()
            cell.font = Font()

def format_table(ws, start_row, start_col, rows, cols):
    """Format a table."""
    for row in ws.iter_rows(min_row=start_row, max_row=start_row + rows - 1, min_col=start_col, max_col=start_col + cols - 1):
        for cell in row:
            cell.border = border
            if cell.row == start_row:
                cell.fill = header_fill
                cell.font = header_font

def format_subtable_header(ws, row, col):
    """Format subtable header."""
    cell = ws.cell(row=row, column=col)
    cell.fill = subtable_header_fill
    cell.font = subtable_header_font

# Process each sheet
for sheet_name in wb.sheetnames:
    ws = wb[sheet_name]
    
    # Convert sheet to DataFrame
    df = pd.DataFrame(ws.values)
    df.columns = df.iloc[0]
    df = df[1:]
    
    # Clear existing data and formatting
    clear_sheet(ws)

    # Creating the summary table
    summary = df.groupby(['Mapped L4', 'Role_temp']).agg(
        High_Count=('High/Low', lambda x: (x == 'High').sum()),
        Low_Count=('High/Low', lambda x: (x == 'Low').sum()),
    ).reset_index()

    summary['Total'] = summary['High_Count'] + summary['Low_Count']
    summary.loc['Total'] = summary.sum(numeric_only=True)
    summary.loc['Total', ['Mapped L4', 'Role_temp']] = 'Total'

    # Write "Summary" heading
    ws.cell(row=2, column=1, value="Summary")
    
    # Write summary table
    summary_start_row = 3
    for r_idx, row in enumerate(dataframe_to_rows(summary, index=False, header=True), summary_start_row):
        for c_idx, value in enumerate(row, 1):
            ws.cell(row=r_idx, column=c_idx, value=value)

    # Appending summary data for the overall summary
    summary_rows.append([sheet_name] + summary.loc['Total', ['High_Count', 'Low_Count', 'Total']].tolist())

    # Format the summary table
    format_table(ws, summary_start_row, 1, len(summary) + 1, 4)

    # Adding subtables
    subtable_start_row = summary_start_row + len(summary) + 2
    for l4_value, l4_group in df.groupby('Mapped L4'):
        # Write Mapped L4 heading
        ws.cell(row=subtable_start_row, column=1, value=l4_value)
        format_subtable_header(ws, subtable_start_row, 1)
        subtable_start_row += 1
        for role_value, role_group in l4_group.groupby('Role_temp'):
            # Write Role_temp heading
            ws.cell(row=subtable_start_row, column=1, value=role_value)
            format_subtable_header(ws, subtable_start_row, 1)
            subtable_start_row += 1
            for r_idx, row in enumerate(dataframe_to_rows(role_group[['Mapped L4', 'Role_temp', 'GCB level', 'FRC Code', 'Country', 'City', 'High/Low']], index=False, header=True), subtable_start_row):
                for c_idx, value in enumerate(row, 1):
                    ws.cell(row=r_idx, column=c_idx, value=value)
            format_table(ws, subtable_start_row, 1, len(role_group) + 1, 7)
            subtable_start_row += len(role_group) + 2
        subtable_start_row += 3

    # Set column width for column I
    ws.column_dimensions['I'].width = 75

# Creating the overall summary sheet
summary_df = pd.DataFrame(summary_rows, columns=['Sheet Name', 'High Total', 'Low Total', 'Total'])
summary_df.loc['Total'] = summary_df[['High Total', 'Low Total', 'Total']].sum()
summary_df.loc['Total', 'Sheet Name'] = 'Grand Total'

summary_ws = wb.create_sheet('Summary')
for r_idx, row in enumerate(dataframe_to_rows(summary_df, index=False, header=True), 1):
    for c_idx, value in enumerate(row, 1):
        summary_ws.cell(row=r_idx, column=c_idx, value=value)

# Apply formatting to the overall summary sheet
format_table(summary_ws, 1, 1, len(summary_df) + 1, 4)

# Apply formatting to the "Total" column headers in summary tables of all sheets
for sheet_name in wb.sheetnames:
    if sheet_name == 'Summary':
        continue
    ws = wb[sheet_name]
    max_row = ws.max_row
    max_col = ws.max_column
    for row in ws.iter_rows(min_row=3, max_row=3, min_col=1, max_col=max_col):
        for cell in row:
            if cell.value in ['Total', 'High_Count', 'Low_Count']:
                cell.fill = header_fill
                cell.font = header_font

# Save the workbook
wb.save('formatted_workbook.xlsx')


In [None]:
import pandas as pd
import openpyxl
from openpyxl.utils.dataframe import dataframe_to_rows

# Load the workbook using openpyxl
file_path = 'path_to_your_excel_file.xlsx'
wb = openpyxl.load_workbook(file_path)

# Initialize a list to collect summary data for each sheet
summary_data = []

# Process each sheet
for sheet_name in wb.sheetnames:
    df = pd.read_excel(file_path, sheet_name=sheet_name)
    
    # Create the summary table
    summary_table = create_summary_table(df)  # Assuming this function is already defined

    # Get total counts from the summary table
    high_total = summary_table['High Count'].sum()
    low_total = summary_table['Low Count'].sum()
    total = summary_table['Total'].sum()
    
    # Append the data to the summary_data list
    summary_data.append({'Sheet Name': sheet_name, 'High Total': high_total, 'Low Total': low_total, 'Total': total})

# Create a DataFrame from the summary data
summary_df = pd.DataFrame(summary_data)

# Calculate grand totals
grand_totals = summary_df[['High Total', 'Low Total', 'Total']].sum()
summary_df = summary_df.append({'Sheet Name': 'Grand Total', 'High Total': grand_totals['High Total'], 
                                 'Low Total': grand_totals['Low Total'], 'Total': grand_totals['Total']}, 
                                ignore_index=True)

# Add a new sheet for the summary
summary_sheet = wb.create_sheet(title='Complete Summary')

# Write the summary DataFrame to the new sheet
for r_idx, row in enumerate(dataframe_to_rows(summary_df, index=False, header=True), 1):
    for c_idx, value in enumerate(row, 1):
        summary_sheet.cell(row=r_idx, column=c_idx, value=value)

# Save the updated workbook
output_file_path = 'output_file_with_summary.xlsx'
wb.save(output_file_path)

print(f"Complete summary sheet has been created and written to '{output_file_path}'.")


In [2]:
import pandas as pd
import numpy as np

# Sample DataFrames
df1 = pd.DataFrame({'a': [1, 2, 3, 4, 5]})
df2 = pd.DataFrame({'p': [3, 4, 5, 6, 7,44,66]})

# Check if values in df1['a'] are in df2['p']
df1['result'] = np.where(df1['a'].isin(df2['p']), 'true', 'n/a')

# Display the result
print(df1)


   a result
0  1    n/a
1  2    n/a
2  3   true
3  4   true
4  5   true


In [4]:
import xlwings as xw

# Open the workbook and the specific sheet
wb = xw.Book('Start-Data-Analysis.xlsx')
sheet = wb.sheets['Raw']



# Find the last non-empty cell by looking at the entire sheet
last_row = sheet.api.UsedRange.Rows.Count
last_col = sheet.api.UsedRange.Columns.Count

# Create the range from A2 to the last occupied cell
data_range = sheet.range(f"A2:{xw.utils.col_name(last_col)}{last_row}")

# Now data_range includes the entire data including blank rows
data = data_range.value

# Print or process the data as needed
print(last_col, last_row)

print(data)

12 261
[[10452.0, datetime.datetime(2022, 11, 7, 0, 0), 'Fries', 3.49, 'Online ', ' Gift Card', 'Tom Jackson', 'London', 574.0, None, None, None], [10453.0, datetime.datetime(2022, 11, 7, 0, 0), 'Beverages', 2.95, 'Online ', ' Gift Card', 'Pablo Perez', 'Madrid', 746.0, None, None, None], [10454.0, datetime.datetime(2022, 11, 7, 0, 0), 'Sides & Other', 4.99, 'In-store ', ' Gift Card', 'Joao Silva', 'Lisbon', 201.0, None, None, None], [10455.0, datetime.datetime(2022, 11, 8, 0, 0), 'Burgers', 12.99, 'In-store ', ' Credit Card', 'Walter Muller', 'Berlin', 570.0, None, None, None], [10456.0, datetime.datetime(2022, 11, 8, 0, 0), 'Chicken Sandwiches', 9.95, 'In-store ', ' Credit Card', 'Walter Muller', 'Berlin', 202.0, None, None, None], [10457.0, datetime.datetime(2022, 11, 8, 0, 0), 'Fries', 3.49, 'In-store ', ' Credit Card', 'Remy Monet', 'Paris', 574.0, None, None, None], [10459.0, datetime.datetime(2022, 11, 8, 0, 0), 'Sides & Other', 4.99, 'In-store ', ' Credit Card', 'Walter Muller'

In [6]:
import xlwings as xw

# Open the workbook
wb = xw.Book('data.xlsx')

# Access the sheet
sheet = wb.sheets['Sheet3']  # Change to the name of your sheet

# Define the range of column D (excluding the header, assuming header is in row 1)
start_cell = sheet.range('D2')
column_range = start_cell.expand('down')

# Copy the values (result of formulas)
values = column_range.value

# Ensure values are a list of lists (each sublist representing a row)
values = [[value] for value in values]

# Paste the values back into the same column, replacing the formulas
column_range.value = values
sheet['C:C'].delete()
# Save the workbook
wb.save()

# Optionally close the workbook
wb.close()


In [None]:
import xlwings as xw

# Open the workbook
wb = xw.Book('path_to_your_workbook.xlsx')

# Access the sheets
sheet1 = wb.sheets['Sheet1']
sheet2 = wb.sheets['Sheet2']

# Copy data from Sheet2 to a new sheet
new_sheet = wb.sheets.add(name='TempSheet')
new_sheet.range('A1').value = sheet2.range('A1').expand().value

# Update references in Sheet1 to point to the new sheet before deleting Sheet2
for formula_cell in sheet1.used_range:
    if formula_cell.formula and 'Sheet2!' in formula_cell.formula:
        formula_cell.formula = formula_cell.formula.replace('Sheet2!', 'TempSheet!')

# Delete the original Sheet2
sheet2.delete()

# Rename the new sheet to Sheet2
new_sheet.name = 'Sheet2'

# Update references in Sheet1 to point to the newly renamed Sheet2
for formula_cell in sheet1.used_range:
    if formula_cell.formula and 'TempSheet!' in formula_cell.formula:
        formula_cell.formula = formula_cell.formula.replace('TempSheet!', 'Sheet2!')

# Save the workbook
wb.save()

# Optionally close the workbook
wb.close()


In [None]:
import xlwings as xw

# Open the current workbook
wb_current = xw.Book('current_workbook.xlsx')
ws_current = wb_current.sheets[0]  # Assuming you want to work with the first worksheet

# Set the header of the new column
ws_current.range('E1').value = 'SPOC'

# Write the VLOOKUP formula in column 'E' of the current workbook
last_row = ws_current.range('D' + str(ws_current.cells.last_cell.row)).end('up').row
lookup_range = ws_current.range('D2:D' + str(last_row))
lookup_formula = f'=VLOOKUP(D2, [new_workbook.xlsx]Sheet1!$A:$B, 2, FALSE)'
ws_current.range('E2').options(expand='down').formula = lookup_formula

# Save and close the current workbook
wb_current.save()
wb_current.close()


In [None]:
import xlwings as xw

def format_workbook(wb_path):
    app = xw.App(visible=False)
    workbook = app.books.open(wb_path)
    
    # Iterate through each sheet in the workbook
    for sheet in workbook.sheets:
        # Iterate through each cell in the sheet
        for cell in sheet.cells:
            # Check if the cell contains a number
            if isinstance(cell.value, (int, float)):
                # Apply the number format
                cell.number_format = '0;(0)'
    
    # Save and close the workbook
    workbook.save()
    workbook.close()
    app.quit()

# Replace 'your_workbook.xlsx' with the path to your Excel workbook
format_workbook('your_workbook.xlsx')


In [None]:
Sub CopyAndPasteData()
    Dim wbSource As Workbook
    Dim wbDestination As Workbook
    Dim wsSource As Worksheet
    Dim wsDestination As Worksheet
    Dim pathSource As String
    Dim pathDestination As String
    
    ' Get the paths from cells A4 and B4
    pathSource = Range("A4").Value
    pathDestination = Range("B4").Value
    
    ' Check if the paths are not empty
    If pathSource = "" Or pathDestination = "" Then
        MsgBox "Please provide paths for both source and destination files."
        Exit Sub
    End If
    
    ' Open the source workbook
    On Error Resume Next
    Set wbSource = Workbooks.Open(pathSource)
    On Error GoTo 0
    If wbSource Is Nothing Then
        MsgBox "Unable to open the source workbook. Please check the path and try again."
        Exit Sub
    End If
    
    ' Set the source worksheet (assuming it's the first sheet)
    Set wsSource = wbSource.Sheets(1)
    
    ' Open the destination workbook
    On Error Resume Next
    Set wbDestination = Workbooks.Open(pathDestination)
    On Error GoTo 0
    If wbDestination Is Nothing Then
        MsgBox "Unable to open the destination workbook. Please check the path and try again."
        wbSource.Close False
        Exit Sub
    End If
    
    ' Set the destination worksheet (assuming it's the first sheet)
    Set wsDestination = wbDestination.Sheets(1)
    
    ' Copy data from source to destination
    wsSource.UsedRange.Copy wsDestination.Range("A1")
    
    ' Save the destination workbook
    wbDestination.Save
    
    ' Close workbooks
    wbSource.Close False
    wbDestination.Close True
    
    MsgBox "Data copied successfully and destination file saved with updated values."
End Sub


In [None]:
import os
import xlwings as xw

# Path to the folder containing Excel files
folder_path = "your_folder_path"

# Path to the workbook to which data will be appended
wb_path = "path_to_your_workbook.xlsx"

# Open the workbook to which data will be appended
wb = xw.Book(wb_path)

# Iterate through each Excel file in the folder
for filename in os.listdir(folder_path):a
    if filename.endswith(".xlsx"):
        # Open the Excel file
        excel_file_path = os.path.join(folder_path, filename)
        wb_excel = xw.Book(excel_file_path)
        
        # Check if the sheet "A#" exists in the Excel file
        if "A#" in wb_excel.sheets:
            # Get the sheet named "A#"
            sheet_a_hash = wb_excel.sheets["A#"]
            
            # Copy data from "A#" sheet and append it to the last sheet of wb
            last_sheet = wb.sheets[-1]
            last_row = last_sheet.range("A" + str(last_sheet.used_range.last_cell.row + 1))
            last_row.value = sheet_a_hash.range("A1").expand().value
        
        # Close the Excel file
        wb_excel.close()

# Save the modified workbook
wb.save()
wb.close()

print("Data appended successfully!")


In [4]:
import os
import xlwings as xw

# Folder containing Excel files
folder_path = "your_folder_path"

# Initialize lists to store data from Actuals ($) and Actuals (#) sheets
combined_actuals_dollar = []
combined_actuals_hash = []

# Create a new workbook to store the combined data
combined_wb = xw.Book()

# Iterate over Excel files in the folder
for filename in os.listdir(folder_path):
    if filename.endswith(".xlsx"):
        # Open the workbook
        wb = xw.Book(os.path.join(folder_path, filename))
        
        # Iterate over sheets in the workbook
        for sheet in wb.sheets:
            # Skip Actuals ($) and Actuals (#) sheets
            if sheet.name not in ['Actuals ($)', 'Actuals (#)']:
                # Copy data from the current sheet to the combined workbook
                combined_wb.sheets.add(sheet.name).range('A1').value = sheet.range('A1').expand().value
        
        # Read data from Actuals ($) sheet and append to the combined_actuals_dollar list
        actuals_dollar_sheet = wb.sheets['Actuals ($)']
        combined_actuals_dollar.append(actuals_dollar_sheet.range('A1').expand().value)
        
        # Read data from Actuals (#) sheet and append to the combined_actuals_hash list
        actuals_hash_sheet = wb.sheets['Actuals (#)']
        combined_actuals_hash.append(actuals_hash_sheet.range('A1').expand().value)
        
        # Close the workbook
        wb.close()

# Combine data from Actuals ($) sheets into a single list
combined_actuals_dollar_data = [item for sublist in combined_actuals_dollar for item in sublist]

# Combine data from Actuals (#) sheets into a single list
combined_actuals_hash_data = [item for sublist in combined_actuals_hash for item in sublist]

# Write combined Actuals ($) data to a new sheet in the combined workbook
combined_wb.sheets.add('Combined Actuals Dollar').range('A1').value = combined_actuals_dollar_data

# Write combined Actuals (#) data to a new sheet in the combined workbook
combined_wb.sheets.add('Combined Actuals Hash').range('A1').value = combined_actuals_hash_data

# Save the combined workbook
combined_wb.save(os.path.join(folder_path, 'Combined_Output.xlsx'))

# Close the combined workbook
combined_wb.close()

print("Combined Excel files created successfully!")


Iteration 1: ['item2', 'item3', 'item4', 'item5', 'item6', 'item7', 'item8', 'item9', 'item10', 'item11', 'item12', 'item13', 'item14', 'item15']
Iteration 2: ['item1', 'item3', 'item4', 'item5', 'item6', 'item7', 'item8', 'item9', 'item10', 'item11', 'item12', 'item13', 'item14', 'item15']
Iteration 3: ['item1', 'item2', 'item4', 'item5', 'item6', 'item7', 'item8', 'item9', 'item10', 'item11', 'item12', 'item13', 'item14', 'item15']
Iteration 4: ['item1', 'item2', 'item3', 'item5', 'item6', 'item7', 'item8', 'item9', 'item10', 'item11', 'item12', 'item13', 'item14', 'item15']
Iteration 5: ['item1', 'item2', 'item3', 'item4', 'item6', 'item7', 'item8', 'item9', 'item10', 'item11', 'item12', 'item13', 'item14', 'item15']
Iteration 6: ['item1', 'item2', 'item3', 'item4', 'item5', 'item7', 'item8', 'item9', 'item10', 'item11', 'item12', 'item13', 'item14', 'item15']
Iteration 7: ['item1', 'item2', 'item3', 'item4', 'item5', 'item6', 'item8', 'item9', 'item10', 'item11', 'item12', 'item13'

In [None]:
from openpyxl import load_workbook

# Function to delete listed sheets from the workbook
def delete_sheets(wb, sheet_names):
    for sheet_name in sheet_names:
        if sheet_name in wb.sheetnames:
            del wb[sheet_name]

# Define source workbook and target directory
source_file = 'source_workbook.xlsx'
target_directory = 'target_directory'

# List of sheets to be deleted
sheets_to_delete = ['Sheet6', 'Sheet7', 'Sheet8']

# Create target directory if it doesn't exist
os.makedirs(target_directory, exist_ok=True)

# Copy source workbook to target directory
shutil.copy(source_file, target_directory)

# Load target workbook
target_file = os.path.join(target_directory, 'source_workbook.xlsx')
wb = load_workbook(target_file)

# Delete specified sheets
delete_sheets(wb, sheets_to_delete)

# Save the modified workbook
wb.save(target_file)


In [3]:
import os
import shutil
from openpyxl import load_workbook

# Function to copy sheets from source workbook to target workbook
def copy_sheets(source_wb, target_wb, sheet_names):
    for sheet_name in sheet_names:
        source_sheet = source_wb[sheet_name]
        target_sheet = target_wb.create_sheet(sheet_name)
        for row in source_sheet.iter_rows(values_only=True):
            target_sheet.append(row)

# Define source workbook and target directory
source_file = 'source_workbook.xlsx'
target_directory = 'target_directory'

# List of common sheet names to be kept in all workbooks
common_sheet_names = ['Sheet1', 'Sheet2', 'Sheet3', 'Sheet4', 'Sheet5']

# List of different sheet names for each workbook
different_sheet_names = ['DifferentSheet1', 'DifferentSheet2', 'DifferentSheet3', 'DifferentSheet4', 'DifferentSheet5']

# Create target directory if it doesn't exist
os.makedirs(target_directory, exist_ok=True)

# Copy source workbook to multiple workbooks
for i in range(15):
    # Create target workbook
    target_file = os.path.join(target_directory, f'workbook_{i+1}.xlsx')
    shutil.copy(source_file, target_file)

    # Load source and target workbooks
    source_wb = load_workbook(source_file)
    target_wb = load_workbook(target_file)

    # Remove default 'Sheet' from target workbook
    default_sheet = target_wb['Sheet']
    target_wb.remove(default_sheet)

    # Copy common sheets to target workbook
    copy_sheets(source_wb, target_wb, common_sheet_names)

    # Copy different sheet to target workbook
    different_sheet_name = different_sheet_names[i % len(different_sheet_names)]
    copy_sheets(source_wb, target_wb, [different_sheet_name])

    # Save target workbook
    target_wb.save(target_file)


In [9]:
import xlwings as xw
import pandas as pd

# Open the source workbook
source_wb = xw.Book("source_workbook.xlsx")

# Create a new workbook for the results
result_wb = xw.Book()

# Iterate through each sheet of the source workbook
for sheet in source_wb.sheets:
    # Read the data from the current sheet into a DataFrame
    df = sheet.used_range.options(pd.DataFrame, index=False, header=True).value
    
    # Initialize dictionaries to store counts
    count_h = {}
    count_l = {}

    # Iterate through the DataFrame to count occurrences of 'H' and 'L' for each unique value in 'R'
    for index, row in df.iterrows():
        r_value = row['R']
        hl_value = row['HL']
        if r_value not in count_h:
            count_h[r_value] = 0
            count_l[r_value] = 0
        if hl_value == 'H':
            count_h[r_value] += 1
        elif hl_value == 'L':
            count_l[r_value] += 1

    # Create a DataFrame to store the counts
    counts_df = pd.DataFrame({'R': list(count_h.keys()),
                              'H': list(count_h.values()),
                              'L': list(count_l.values())})

    # Calculate the total for each 'R' value
    counts_df['Total'] = counts_df['H'] + counts_df['L']

    # Calculate the totals for 'H', 'L', and 'Total'
    totals = counts_df[['H', 'L', 'Total']].sum().tolist()

    # Add a row for totals
    counts_df.loc[len(counts_df)] = ['Total'] + totals

    # Write the results to the new workbook with the same sheet name
    result_sheet = result_wb.sheets.add(sheet.name)
    result_sheet.range("A1").value = counts_df

    # Apply borders to the table
    table_range = result_sheet.range("A1").expand()
    table_range.api.Borders.Weight = 2  # Medium weight borders

    # Color the first and last rows with light blue background
    first_row_range = result_sheet.range((1, 1), (1, counts_df.shape[1]))
    last_row_range = result_sheet.range((counts_df.shape[0], 1), (counts_df.shape[0], counts_df.shape[1]))
    first_row_range.api.Interior.Color = 15131566  # Light blue
    last_row_range.api.Interior.Color = 15131566  # Light blue

# Save the result workbook
result_wb.save("result_workbook.xlsx")

# Close both workbooks
source_wb.close()
result_wb.close()


       R  H  L  Total
0      A  3  0      3
1      B  1  1      2
2      C  1  1      2
3      D  0  1      1
4  Total  5  3      8


In [8]:
import pandas as pd

# Example DataFrame
data = {'R': ['A', 'B', 'A', 'C', 'B', 'A', 'D', 'C'],
        'H': ['X', 'Y', 'X', 'X', 'Z', 'Y', 'Z', 'Y'],
        'L': ['M', 'N', 'M', 'N', 'N', 'M', 'N', 'M']}
df = pd.DataFrame(data)

# Initialize dictionaries for counting occurrences
count_dict_r = {}
count_dict_h = {}
count_dict_l = {}

# Count occurrences in column 'R'
for value in df['R']:
    count_dict_r[value] = count_dict_r.get(value, 0) + 1

# Count occurrences in column 'H'
for value in df['H']:
    count_dict_h[value] = count_dict_h.get(value, 0) + 1

# Count occurrences in column 'L'
for value in df['L']:
    count_dict_l[value] = count_dict_l.get(value, 0) + 1

print("Counts for column 'R':", count_dict_r)
print("Counts for column 'H':", count_dict_h)
print("Counts for column 'L':", count_dict_l)


   R  Total_H_Count  Total_L_Count  Total_Count
0  A              3              3            6
1  B              2              2            4
2  C              2              2            4
3  D              1              1            2


In [6]:
import pandas as pd

# Example DataFrame
data = {'R': ['A', 'B', 'A', 'C', 'B', 'A', 'D', 'C']}
df = pd.DataFrame(data)

# Initialize an empty dictionary
count_dict = {}

# Iterate through the DataFrame to count occurrences of each unique value in column 'R'
for value in df['R']:
    count_dict[value] = count_dict.get(value, 0) + 1

print(count_dict)


{'A': 3, 'B': 2, 'C': 2, 'D': 1}


In [None]:
import xlwings as xw

# Open the Excel workbook
wb = xw.Book("path_to_your_workbook.xlsx")

# Iterate over each sheet
for sheet in wb.sheets:
    # Insert "Summary" in cell B2
    sheet.range("B2").value = "Summary"
    
    # Create the first table
    headers_table1 = ["SubF", "Role", "Total", "High", "Low"]
    data_table1 = [["" for _ in range(len(headers_table1))] for _ in range(5)]  # 5 rows including header
    table1_range = sheet.range("B3").expand("table")
    table1_range.clear_contents()
    table1_range.value = [headers_table1] + data_table1
    table1_range.rows[0].color = (0, 112, 192)  # Blue background for header row
    table1_range.rows[-1].color = (0, 112, 192)  # Blue background for last row
    
    # Insert "Sheet name" in cell B10
    sheet.range("B10").value = "Sheet name"
    
    # Create the second table
    headers_table2 = ["C and H", "Y-TD", "Actuals-Feb", "Actuals-Jan", "Target-y", "Target-fy", "Mar-forecast", "Extra"]
    data_table2 = [["" for _ in range(len(headers_table2))] for _ in range(8)]  # 8 rows including header
    table2_range = sheet.range("B11").expand("table")
    table2_range.clear_contents()
    table2_range.value = [headers_table2] + data_table2
    
    # Move down existing contents to accommodate the new tables
    existing_data_range = sheet.range("A1").expand("table")
    num_existing_rows = existing_data_range.shape[0]
    table1_range = sheet.range(table1_range.address)
    table2_range = sheet.range(table2_range.address)
    existing_data_range.offset(row_offset=13).value = existing_data_range.value
    existing_data_range.clear_contents()

# Save the workbook
wb.save()
wb.close()


In [None]:
import xlwings as xw
import shutil

# List of values
values_list = ['value1', 'value2', 'value3']

# Iterate over each value in the list
for value in values_list:
    # Create a copy of the main workbook
    shutil.copyfile('wb_main.xlsx', f'wb_copy_{value}.xlsx')
    
    # Open the copied workbook
    wb_copy = xw.Book(f'wb_copy_{value}.xlsx')
    
    # Open sheets sh1 and sh2
    sh1 = wb_copy.sheets['sh1']
    sh2 = wb_copy.sheets['sh2']
    
    # Apply filter to column W in sh1 and sh2
    sh1.autofilter('W1').filter('W', value)
    sh2.autofilter('W1').filter('W', value)
    
    # Copy filtered data to new sheets
    sh1_updated = wb_copy.sheets.add(name='sh1_updated')
    sh2_updated = wb_copy.sheets.add(name='sh2_updated')
    
    sh1.range('A1').expand().copy(sh1_updated.range('A1'))
    sh2.range('A1').expand().copy(sh2_updated.range('A1'))
    
    # Clear filter from sh1 and sh2
    sh1.autofilter.show_all()
    sh2.autofilter.show_all()
    
    # Delete sh1 and sh2 from the workbook
    wb_copy.sheets['sh1'].delete()
    wb_copy.sheets['sh2'].delete()
    
    # Save and close the workbook
    wb_copy.save()
    wb_copy.close()


In [None]:
import xlwings as xw
import shutil

# List of values
values_list = ['value1', 'value2', 'value3']

# Iterate over each value in the list
for value in values_list:
    # Create a copy of the main workbook
    shutil.copyfile('wb_main.xlsx', f'wb_copy_{value}.xlsx')
    
    # Open the copied workbook
    wb_copy = xw.Book(f'wb_copy_{value}.xlsx')
    
    # Open sheets sh1 and sh2
    sh1 = wb_copy.sheets['sh1']
    sh2 = wb_copy.sheets['sh2']
    
    # Find the last row with data in column W for both sheets
    last_row_sh1 = sh1.range('W' + str(sh1.cells.rows.count)).end('up').row
    last_row_sh2 = sh2.range('W' + str(sh2.cells.rows.count)).end('up').row
    
    # Find the range containing the column data for both sheets
    range_sh1 = sh1.range(f'W1:W{last_row_sh1}')
    range_sh2 = sh2.range(f'W1:W{last_row_sh2}')
    
    # Find rows where column W value matches the current value
    rows_sh1 = [row for row in range_sh1 if row.value == value]
    rows_sh2 = [row for row in range_sh2 if row.value == value]
    
    # Create new sheets sh1_updated and sh2_updated
    sh1_updated = wb_copy.sheets.add(name='sh1_updated')
    sh2_updated = wb_copy.sheets.add(name='sh2_updated')
    
    # Copy the matching rows to the new sheets
    if rows_sh1:
        sh1.range(f'A1').value = sh1.range(f'A{rows_sh1[0].row}:{sh1.cells(last_row_sh1, sh1.cells.columns.count).address}').value
    if rows_sh2:
        sh2.range(f'A1').value = sh2.range(f'A{rows_sh2[0].row}:{sh2.cells(last_row_sh2, sh2.cells.columns.count).address}').value
    
    # Delete sh1 and sh2 from the workbook
    wb_copy.sheets['sh1'].delete()
    wb_copy.sheets['sh2'].delete()
    
    # Save and close the workbook
    wb_copy.save()
    wb_copy.close()


In [None]:
import pandas as pd
import xlwings as xw

# Load your DataFrame (df) and specify the column to copy
col_to_copy = 'Your_Column_Name'  # Replace 'Your_Column_Name' with the actual column name
# Load your DataFrame (df) and specify the column to copy
values = df[col_to_copy].values

# Start cell where you want to paste the values (e.g., E1)
start_cell = 'E1'

# Open an Excel workbook
wb = xw.Book('your_workbook.xlsx')  # Replace 'your_workbook.xlsx' with the actual file name

# Open the specified sheet
sheet = wb.sheets['Sheet1']  # Replace 'Sheet1' with the actual sheet name

# Write the values to the specified range in the Excel sheet
sheet.range(start_cell).value = values

# Close the workbook
wb.close()


In [None]:
import xlwings as xw

# Open the workbook
wb = xw.Book("example.xlsx")

# Specify the sheet and column you want to copy from
sheet = wb.sheets["Sheet1"]
column_to_copy = "A"

# Find the last row with data in the column
last_row = sheet.range(f"{column_to_copy}:{column_to_copy}").end("down").row

# Get the range containing the column data
range_to_copy = sheet.range(f"{column_to_copy}1:{column_to_copy}{last_row}")

# Extract the values from the range (result of formulas)
values = range_to_copy.value

# Paste the values into a new column
new_column = "E"
start_cell = f"{new_column}1"
end_cell = f"{new_column}{last_row}"
sheet.range(start_cell).value = values

# Save the workbook
wb.save("example_with_copied_column.xlsx")

# Close the workbook
wb.close()


In [14]:
import xlwings as xw

# Open the workbook
wb = xw.Book(r"C:\\Users\\KS\\OneDrive\\Documents\\test11.xlsx")

# Open the sheet
s1 = wb.sheets['s2']

# Get the range of data in column A
col_A_range = s1.range('E:E')

# Get the last row with data in column A
last_row = s1.cells(s1.rows.count, 'E').end('up').row

# Define the range where you want to paste the values (in column E)
col_E_range = s1.range(f'F1:F{last_row}')

# Copy the values from column A to column E
col_A_range.copy(destination=col_E_range, transpose=True, skip_blanks=True, transpose_dest=True)

# Close the workbook (optional)
wb.close()


AttributeError: 'Sheet' object has no attribute 'rows'

In [5]:
import xlwings as xw

# Open the workbook
wb = xw.Book("C:\\Users\\KS\\OneDrive\\Documents\\test11.xlsx")

# Specify the sheet and cell you want to remove
sheet = wb.sheets["S1"]
cell_to_remove = "A2"  # For example, remove cell A2
# Clear the contents of the cell and shift cells up
sheet.range(cell_to_remove).delete(shift="up")

# Save the workbook
wb.save("C:\\Users\\KS\\OneDrive\\Documents\\test112.xlsx")

# Close the workbook
wb.close()


In [13]:
import win32com.client

# Open Excel application
excel = win32com.client.Dispatch("Excel.Application")

# Open the workbook
workbook = excel.Workbooks.Open(r"C:\\Users\\KS\\OneDrive\\Documents\\test11.xlsx")

# Get the worksheet named "A"
sheet = workbook.Sheets("S2")

# Get the range of column D
column_d = sheet.Columns("E")


# Get the cell containing the formula
cell = sheet.Range("E1:E10")  # Replace "A1" with your cell address

# Read the text of the cell (string value)
cell_text = cell.Text

print("Cell text:", cell_text)
print("cell", cell)

# Close the workbook
workbook.Close()

# Quit Excel application
excel.Quit()



Cell text: None
cell (('Test',), ('JAm',), ('JAm',), ('Ham',), ('Ham',), ('Ham',), ('Ham',), ('Ham',), ('Ham',), ('Ham',))


In [None]:
from openpyxl import load_workbook

# Load the workbook
workbook = load_workbook('your_workbook.xlsx')

# Select the specific sheet (replace 'Sheet A' with the actual name of your sheet)
sheet = workbook['Sheet A']

# Create a new column index (for example, column F)
column_index = 6  # Column F corresponds to index 6 (0-indexed)

# Iterate over the cells in column D and paste values into column F
for i, cell in enumerate(sheet['D'], start=1):
    # Evaluate the formula to get the value
    value = cell.value
    # Paste the value into the corresponding cell in the new column (Column F)
    sheet.cell(row=i, column=column_index, value=value)

# Save the workbook
workbook.save('your_updated_workbook.xlsx')

print("Values copied from column D and pasted into column F!")


In [None]:
from openpyxl import load_workbook

# Load the workbook
workbook = load_workbook('your_workbook.xlsx')

# Select the active sheet (you can change this if you want to select a specific sheet)
sheet = workbook.active

# Define the column to copy (for example, column D)
column_to_copy = sheet['D']

# Create a new column for pasting the values (for example, column E)
column_to_paste = sheet['E']

# Iterate over the cells in the column to copy
for i, cell in enumerate(column_to_copy, start=1):
    # Evaluate the formula to get the value
    value = cell.value
    # Paste the value into the corresponding cell in the new column
    column_to_paste[i].value = value

# Save the workbook
workbook.save('your_updated_workbook.xlsx')

print("Values copied from column D and pasted into column E!")



In [None]:
import shutil

# Path to the original workbook
original_workbook_path = 'path/to/original_workbook.xlsx'

# Path where you want to create the duplicate workbook
duplicate_workbook_path = 'path/to/duplicate_workbook.xlsx'

# Copy the original workbook to create the duplicate
shutil.copyfile(original_workbook_path, duplicate_workbook_path)

print("Workbook duplicated successfully!")


In [None]:
from openpyxl import load_workbook
from openpyxl import Workbook

# Load the original workbook
original_workbook = load_workbook('original_workbook.xlsx')

# Create a new workbook
copied_workbook = Workbook()

# Copy each sheet from the original workbook to the new workbook
for sheet_name in original_workbook.sheetnames:
    original_sheet = original_workbook[sheet_name]
    copied_sheet = copied_workbook.create_sheet(title=sheet_name)
    for row in original_sheet.iter_rows(values_only=True):
        copied_sheet.append(row)

# Save the copied workbook with a different filename
copied_workbook.save('copied_workbook.xlsx')


In [None]:
from openpyxl import load_workbook
from openpyxl.utils import get_column_letter
from openpyxl.styles import PatternFill

# List of strings
list_of_strings = ['String1', 'String2', 'String3']  # Update with your list of strings

for string_item in list_of_strings:
    # Step 1: Open the original workbook and create a copy
    original_workbook = load_workbook('original_workbook.xlsx')
    output_workbook = original_workbook.copy_workbook()

    # Step 2: Copy the formula results from column 'S' to a new column 'SV'
    source_sheet = output_workbook['YourSheetName']  # Update with your sheet name
    sv_column_index = source_sheet.max_column + 1
    for row in source_sheet.iter_rows(min_row=2, max_row=source_sheet.max_row, min_col=19, max_col=19):  # Column S is column 19
        source_sheet.cell(row=row[0].row, column=sv_column_index).value = row[0].value  # Copy formula result to new column

    # Step 3: Search for the list item in column 'SV' in sheet 'AH', copy data to new sheet 'AHV', and delete 'AH' sheet
    ah_sheet = output_workbook['AH']
    ahv_sheet = output_workbook.create_sheet(title='AHV')
    ahv_row_index = 1
    for row in ah_sheet.iter_rows(min_row=2, max_row=ah_sheet.max_row, min_col=sv_column_index, max_col=sv_column_index):
        if row[0].value == string_item:
            for cell in row:
                ahv_sheet.cell(row=ahv_row_index, column=cell.column).value = cell.value
            ahv_row_index += 1
    output_workbook.remove(ah_sheet)  # Delete 'AH' sheet

    # Step 4: Save the new workbook
    output_workbook.save(f'output_workbook_{string_item}.xlsx')


In [None]:
from openpyxl import load_workbook
from openpyxl.utils.dataframe import dataframe_to_rows
import pandas as pd

# Load the original workbook
original_workbook = load_workbook('original_workbook.xlsx')

# Identify the sheet and column containing the filter condition
target_sheet_name = 'TargetSheet'
filter_column_name = 'ColumnA'  # Change this to the actual column name

# Get the target sheet
target_sheet = original_workbook[target_sheet_name]

# Extract the data from the target sheet into a pandas DataFrame
data = target_sheet.values
columns = next(data)
df = pd.DataFrame(data, columns=columns)

# Filter the DataFrame based on the filter column
filtered_df = df[df[filter_column_name] == 'YourCondition']

# Create a new workbook
output_workbook = load_workbook()

# Copy the target sheet along with formatting and formulae
output_workbook[target_sheet_name] = target_sheet

# Write the filtered DataFrame to the target sheet
for row in dataframe_to_rows(filtered_df, index=False, header=True):
    output_workbook[target_sheet_name].append(row)

# Save the new workbook
output_workbook.save('filtered_workbook.xlsx')


In [None]:
import win32com.client as win32

# Open Excel application
excel = win32.Dispatch("Excel.Application")

# Open the workbook
workbook = excel.Workbooks.Open(r'path\to\your\workbook.xlsx')

# Get the specific sheet (e.g., Actuals($))
sheet = workbook.Sheets("Actuals($)")

# Get the range of column SF
sf_column = sheet.Columns("SF")

# Copy the values from column SF
sf_column.Copy()

# Find the next available column index for SFV
sfv_column_index = sf_column.Column + 1

# Paste the values as values (result of formula) into a new column SFV
sfv_column = sheet.Cells(1, sfv_column_index)
sfv_column.PasteSpecial(Paste=-4163, Operation=-4144, SkipBlanks=False, Transpose=False)

# Save the workbook
workbook.Save()

# Close the workbook and Excel application
workbook.Close()
excel.Quit()


In [None]:
from openpyxl import load_workbook
from openpyxl.utils import get_column_letter

# Load the workbook
workbook = load_workbook('your_workbook.xlsx')

# Select the worksheet
worksheet = workbook['Your_Sheet']

# Get the range of cells in the original column
original_column = worksheet['E']  # Assuming the SF column is column E

# Insert a new column after the original column
new_column_index = original_column[0].column + 1
worksheet.insert_cols(new_column_index)

# Get the letter of the new column
new_column_letter = get_column_letter(new_column_index)

# Iterate through each cell in the original column and copy the formula
for cell in original_column:
    worksheet[f'{new_column_letter}{cell.row}'].value = cell.value

# Save the workbook
workbook.save('your_modified_workbook.xlsx')


In [None]:
def copy_filtered_data(source_sheet, target_sheet, unique_value):
    for row in range(2, source_sheet.max_row + 1):
        sf_value = source_sheet.cell(row=row, column=5).value  # Assuming SF column is the 5th column (index 4)
        if sf_value == unique_value:
            target_sheet.append([source_sheet.cell(row=row, column=col).value for col in range(1, source_sheet.max_column + 1)])


In [None]:
import openpyxl
from openpyxl.utils.cell import get_column_letter

# Function to read unique values from a column in a sheet
def read_unique_values(file_path, sheet_name, column_name):
    workbook = openpyxl.load_workbook(file_path)
    sheet = workbook[sheet_name]
    max_row = sheet.max_row
    
    unique_values = set()
    for row in range(2, max_row + 1):  # Start from 2nd row (excluding header)
        cell_value = sheet[f"{column_name}{row}"].value
        unique_values.add(cell_value)
    
    return list(unique_values)

# Function to copy filtered data to a new sheet
def copy_filtered_data(source_sheet, target_sheet, unique_value):
    for row in source_sheet.iter_rows(min_row=2, values_only=True):
        if row[4] == unique_value:  # Assuming SF column is the 5th column (index 4)
            target_sheet.append(row)

# Main function
def process_main_workbook(main_file_path):
    # Read unique values from 'Actuals($)' and 'Actuals(#)' sheets
    unique_values_dollar = read_unique_values(main_file_path, 'Actuals($)', 'E')
    unique_values_hash = read_unique_values(main_file_path, 'Actuals(#)', 'E')
    
    for unique_value in unique_values_dollar:
        # Create a copy of the main workbook
        new_workbook = openpyxl.load_workbook(main_file_path)
        
        # Apply filters to 'Actuals($)' and 'Actuals(#)' sheets
        dollar_sheet = new_workbook['Actuals($)']
        hash_sheet = new_workbook['Actuals(#)']
        dollar_filtered_sheet = new_workbook.create_sheet(title=f'Actuals($)_fil_{unique_value}')
        hash_filtered_sheet = new_workbook.create_sheet(title=f'Actuals(#)_fil_{unique_value}')
        
        # Copy filtered data to new sheets
        copy_filtered_data(dollar_sheet, dollar_filtered_sheet, unique_value)
        copy_filtered_data(hash_sheet, hash_filtered_sheet, unique_value)
        
        # Delete the original sheets
        new_workbook.remove(dollar_sheet)
        new_workbook.remove(hash_sheet)
        
        # Save the new workbook with filtered data
        new_file_name = f'Filtered_Workbook_{unique_value}.xlsx'
        new_workbook.save(new_file_name)
        print(f"Filtered workbook saved as '{new_file_name}'")

# Usage
process_main_workbook('main_workbook.xlsx')


In [None]:
# filter and copy the sheets
import openpyxl
from openpyxl.utils.dataframe import dataframe_to_rows
from openpyxl.utils.cell import get_column_letter

# Function to read unique values from a column in a sheet
def read_unique_values(file_path, sheet_name, column_name):
    workbook = openpyxl.load_workbook(file_path)
    sheet = workbook[sheet_name]
    unique_values = set(sheet[column_name][1:])  # Exclude header row
    unique_values = [value.value for value in unique_values if value.value is not None]  # Filter out None values
    return unique_values

# Function to copy filtered data to a new sheet
def copy_filtered_data(source_sheet, target_sheet, unique_value):
    for row in source_sheet.iter_rows(min_row=2, values_only=True):
        if row[4] == unique_value:  # Assuming SF column is the 5th column (index 4)
            target_sheet.append(row)

# Main function
def process_main_workbook(main_file_path):
    # Read unique values from 'Actuals($)' and 'Actuals(#)' sheets
    unique_values_dollar = read_unique_values(main_file_path, 'Actuals($)', 'SF')
    unique_values_hash = read_unique_values(main_file_path, 'Actuals(#)', 'SF')
    
    for unique_value in unique_values_dollar:
        # Create a copy of the main workbook
        new_workbook = openpyxl.load_workbook(main_file_path)
        
        # Apply filters to 'Actuals($)' and 'Actuals(#)' sheets
        dollar_sheet = new_workbook['Actuals($)']
        hash_sheet = new_workbook['Actuals(#)']
        dollar_filtered_sheet = new_workbook.create_sheet(title=f'Actuals($)_fil_{unique_value}')
        hash_filtered_sheet = new_workbook.create_sheet(title=f'Actuals(#)_fil_{unique_value}')
        
        # Copy filtered data to new sheets
        copy_filtered_data(dollar_sheet, dollar_filtered_sheet, unique_value)
        copy_filtered_data(hash_sheet, hash_filtered_sheet, unique_value)
        
        # Delete the original sheets
        new_workbook.remove(dollar_sheet)
        new_workbook.remove(hash_sheet)
        
        # Save the new workbook with filtered data
        new_file_name = f'Filtered_Workbook_{unique_value}.xlsx'
        new_workbook.save(new_file_name)
        print(f"Filtered workbook saved as '{new_file_name}'")

# Usage
process_main_workbook('main_workbook.xlsx')


In [None]:
from openpyxl import load_workbook

# Load the workbook
workbook = load_workbook("your_workbook.xlsx")

# Initialize sets to store unique SF values
sf_values = set()

# Iterate through the sheets
for sheet_name in ["Actuals($)", "Actuals(#)"]:
    # Access the sheet
    sheet = workbook[sheet_name]
    
    # Find the index of the SF column
    sf_column_index = None
    for col_idx in range(1, sheet.max_column + 1):
        if sheet.cell(row=1, column=col_idx).value == 'SF':
            sf_column_index = col_idx
            break
    
    if sf_column_index:
        # Iterate through rows to extract SF values
        for row_idx in range(2, sheet.max_row + 1):
            sf_value = sheet.cell(row=row_idx, column=sf_column_index).value
            if sf_value:
                sf_values.add(sf_value)

# Save the unique SF values to a file
with open("unique_sf_values.txt", "w") as file:
    for sf_value in sf_values:
        file.write(str(sf_value) + "\n")

print("Unique SF values list created and saved successfully.")


In [13]:
import win32com.client as win32

# Open Excel application
excel = win32.Dispatch("Excel.Application")

# Open the main workbook wb1
workbook = excel.Workbooks.Open(r"test11.xlsx")

# Save the workbook with a different name
new_workbook_name = r"test_replica14.xlsx"
workbook.SaveAs(new_workbook_name)

# Close the workbook
workbook.Close()

# Quit Excel application
excel.Quit()


In [14]:
import win32com.client as win32

# Open Excel application
excel = win32.Dispatch("Excel.Application")

# Open the new workbook
workbook = excel.Workbooks.Open(new_workbook_name)

# Get the s1 sheet
sheet_s1 = workbook.Sheets("s1")

# Get the used range of s1 sheet
used_range = sheet_s1.UsedRange

# Get the column index for "Agency Name"
agency_name_col_index = None
for col_index in range(1, used_range.Columns.Count + 1):
    if used_range.Cells(1, col_index).Value == "Agency Name":
        agency_name_col_index = col_index
        break

# Delete rows where Agency Name is not "ALNM"
if agency_name_col_index:
    row_index = 2  # Start from the second row
    while row_index <= used_range.Rows.Count:
        cell_value = used_range.Cells(row_index, agency_name_col_index).Value
        if cell_value != "Catholic Charities CYO":
            # Delete the entire row if Agency Name is not "ALNM"
            sheet_s1.Rows(row_index).Delete()
        else:
            row_index += 1  # Move to the next row

# Save the workbook
workbook.Save()

# Close the workbook
workbook.Close()

# Quit Excel application
excel.Quit()


In [None]:
['AIDS Housing Alliance',
'AIDS Legal Referral Panel of the SF Bay Area',
'APA Family Support Services/SCDC',
'APA Family Support Services/YMCA of San Francisco (Bayview)',
'Arab Cultural and Community Center',
'Asian Americans Advancing Justice - Asian Law Caucus',
'Asian Women’s Shelter',
'ASIAN, Inc.']


In [18]:
import win32com.client as win32

# Open Excel application
excel = win32.Dispatch("Excel.Application")
# Open the main workbook
main_workbook = excel.Workbooks.Open(r"test11.xlsx")

# List of agency names
agency_names =['AIDS Housing Alliance',
'AIDS Legal Referral Panel of the SF Bay Area',
'APA Family Support Services/SCDC',
'APA Family Support Services/YMCA of San Francisco (Bayview)',
'Arab Cultural and Community Center',
'Asian Americans Advancing Justice - Asian Law Caucus',
'Asian Women’s Shelter',
'ASIAN, Inc.']

# Iterate over each agency name
for agency_name in agency_names:
    # Create a new workbook for the current agency name
    new_workbook = excel.Workbooks.Add()
    new_workbook_name = f"W_{agency_name}.xlsx"
    
    # Get the s1 sheet from the main workbook
    sheet_s1 = main_workbook.Sheets("s1")
    
    # Copy s1 sheet to the new workbook
    sheet_s1.Copy(Before=new_workbook.Sheets(1))
    
    # Rename the copied sheet to "s1"
    new_workbook.Sheets(1).Name = "s1"
    
    # Delete the extra "Sheet1" sheet
    new_workbook.Sheets("Sheet1").Delete()
    
    # Get the used range of s1 sheet
    used_range = new_workbook.Sheets("s1").UsedRange
    
    # Get the column index for "Agency Name"
    agency_name_col_index = None
    for col_index in range(1, used_range.Columns.Count + 1):
        if used_range.Cells(1, col_index).Value == "Agency Name":
            agency_name_col_index = col_index
            break
    
    # Delete rows where Agency Name is not the current agency name
    if agency_name_col_index:
        row_index = 2  # Start from the second row
        while row_index <= used_range.Rows.Count:
            cell_value = used_range.Cells(row_index, agency_name_col_index).Value
            if cell_value != agency_name:
                # Delete the entire row if Agency Name is not the current agency name
                new_workbook.Sheets("s1").Rows(row_index).Delete()
            else:
                row_index += 1  # Move to the next row
    
    # Save the new workbook
    new_workbook.SaveAs(new_workbook_name)
    new_workbook.Close()

    
# Close the main workbook
main_workbook.Close()

# Quit Excel application
excel.Quit()


com_error: (-2147352567, 'Exception occurred.', (0, 'Microsoft Excel', 'Microsoft Excel cannot access the file "C:\\Users\\KS\\OneDrive\\Documents\\W_APA Family Support Services\\AA42E420". There are several possible reasons:\n\n• The file name or path does not exist.\n• The file is being used by another program.\n• The workbook you are trying to save has the same name as a currently open workbook.', 'xlmain11.chm', 0, -2146827284), None)

In [26]:
import win32com.client as win32

# Open Excel application
excel = win32.Dispatch("Excel.Application")

# Open the main workbook
main_workbook = excel.Workbooks.Open(r"test11.xlsx")

# List of agency names
agency_names =['AIDS Housing Alliance',
'AIDS Legal Referral Panel of the SF Bay Area',
'Arab Cultural and Community Center',
'Asian Americans Advancing Justice - Asian Law Caucus',
'Asian Women’s Shelter',
'ASIAN, Inc.']
 # Example list of agency names

# Iterate over each agency name
for agency_name in agency_names:
    # Create a new workbook for the current agency name
    new_workbook = excel.Workbooks.Add()
    new_workbook_name = f"a_1{agency_name}.xlsx"
    
    # Copy the sheets from the main workbook to the new workbook
    for sheet in main_workbook.Sheets:
        sheet.Copy(Before=new_workbook.Sheets(1))
    
    # Get the s1 sheet from the new workbook
    sheet_s1 = new_workbook.Sheets("s1")
    
    # Get the used range of s1 sheet
    used_range = sheet_s1.UsedRange
    
    # Get the column index for "Agency Name"
    agency_name_col_index = None
    for col_index in range(1, used_range.Columns.Count + 1):
        if used_range.Cells(1, col_index).Value == "Agency Name":
            agency_name_col_index = col_index
            break
    
    # Delete rows where Agency Name is not the current agency name
    if agency_name_col_index:
        row_index = 2  # Start from the second row
        while row_index <= used_range.Rows.Count:
            cell_value = used_range.Cells(row_index, agency_name_col_index).Value
            if cell_value != agency_name:
                # Delete the entire row if Agency Name is not the current agency name
                sheet_s1.Rows(row_index).Delete()
            else:
                row_index += 1  # Move to the next row
   
        # List of sheet names in the desired order
    sheet_order = ["s1", "s2", "s3"]  # Add more sheets as needed

    # Create a dictionary to store sheet index by name
    sheet_index ={sheet.Name: sheet.Index for sheet in new_workbook.Sheets}

    # Rearrange sheets according to the desired order
    for i, sheet_name in enumerate(sheet_order, 1):
        new_workbook.Sheets(sheet_name).Move(Before=new_workbook.Sheets(i))
    # Save the new workbook
    new_workbook.Sheets("Sheet1").Delete()
    
    new_workbook.SaveAs(new_workbook_name)
    new_workbook.Close()

# Close the main workbook
main_workbook.Close()

# Quit Excel application
excel.Quit()


In [None]:
w

In [4]:
from openpyxl import load_workbook

# Load the workbook
workbook = load_workbook("your_workbook.xlsx")

# Initialize a list to store unique SF values while preserving their sequence
sf_values = []

# Iterate through the sheets
for sheet_name in ["Actuals($)", "Actuals(#)"]:
    # Access the sheet
    sheet = workbook[sheet_name]
    
    # Find the index of the SF column
    sf_column_index = None
    for col_idx in range(1, sheet.max_column + 1):
        if sheet.cell(row=1, column=col_idx).value == 'SF':
            sf_column_index = col_idx
            break
    
    if sf_column_index:
        # Iterate through rows to extract SF values
        for row_idx in range(2, sheet.max_row + 1):
            sf_value = sheet.cell(row=row_idx, column=sf_column_index).value
            if sf_value and sf_value not in sf_values:
                sf_values.append(sf_value)

# Save the unique SF values to a file while maintaining sequence
with open("unique_sf_values.txt", "w") as file:
    for sf_value in sf_values:
        file.write(str(sf_value) + "\n")

print("Unique SF values list created and saved successfully.")


FileNotFoundError: [Errno 2] No such file or directory: 'your_workbook.xlsx'

In [5]:
import os
import openpyxl
import win32com.client as win32

# Function to create a list of unique 'SF' values and save it to a file
def create_unique_sf_list(input_workbook, output_file):
    unique_sf_values = set()

    # Open the workbook
    workbook = openpyxl.load_workbook(input_workbook, read_only=True)

    # Iterate over each sheet
    for sheet in workbook:
        if sheet.title == 'Actuals($)':
            for row in sheet.iter_rows(min_row=2, values_only=True):
                unique_sf_values.add(row[0])  # Assuming 'SF' column is the first column

    # Save unique 'SF' values to a file
    with open(output_file, 'w') as file:
        for sf_value in unique_sf_values:
            file.write(f"{sf_value}\n")

    print("Unique SF values list created and saved successfully.")

# Function to read unique 'SF' values from file and perform remaining operations
def process_workbooks(input_workbook, unique_sf_file, output_folder):
    # Read unique 'SF' values from file
    with open(unique_sf_file, 'r') as file:
        unique_sf_values = [line.strip() for line in file]

    # Create Excel application object
    excel = win32.Dispatch('Excel.Application')
    excel.Visible = False  # Hide Excel window

    # Open the main workbook
    workbook = excel.Workbooks.Open(input_workbook)

    # Get sheet names
    sheet_names = [sheet.Name for sheet in workbook.Sheets]

    # Iterate over each unique 'SF' value
    for sf_value in unique_sf_values:
        new_workbook_name = os.path.join(output_folder, f"wb1_{sf_value}.xlsx")
        new_workbook = excel.Workbooks.Add()  # Create a new workbook

        # Iterate over each sheet in the main workbook
        for sheet_name in sheet_names:
            sheet = workbook.Sheets(sheet_name)
            sheet.Copy(Before=new_workbook.Sheets(1))  # Copy sheet to the new workbook

            # Delete rows where 'SF' value does not match the current 'SF' value
            if sheet_name == 'Actuals($)':
                new_sheet = new_workbook.Sheets(sheet.Name)
                last_row = new_sheet.Cells(new_sheet.Rows.Count, 1).End(-4162).Row
                for i in range(last_row, 1, -1):
                    if new_sheet.Cells(i, 1).Value != sf_value:
                        new_sheet.Rows(i).Delete()

        # Save the new workbook
        new_workbook.SaveAs(new_workbook_name)
        new_workbook.Close()

    # Close the main workbook
    workbook.Close()
    excel.Quit()

# Main workbook containing multiple sheets
input_workbook = 'wb1.xlsx'

# Output folder for saving filtered workbooks
output_folder = 'filtered_workbooks/'

# Create output folder if it doesn't exist
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# File to store unique 'SF' values
unique_sf_file = 'unique_sf_values.txt'

# Create unique 'SF' values list and save it to a file
create_unique_sf_list(input_workbook, unique_sf_file)

# Process workbooks using unique 'SF' values list
process_workbooks(input_workbook, unique_sf_file, output_folder)


FileNotFoundError: [Errno 2] No such file or directory: 'wb1.xlsx'

In [45]:
import os
import win32com.client as win32

# Constants for Excel
xlUp = -4162  # Numeric value for xlUp

# Function to filter and save data for each unique 'SF' value
def filter_and_save_workbooks(main_workbook, output_folder):
    # Create Excel application object
    excel = win32.Dispatch('Excel.Application')
    excel.Visible = False  # Hide Excel window

    # Open the main workbook
    workbook = excel.Workbooks.Open(main_workbook)

    # Get sheet names
    sheet_names = [sheet.Name for sheet in workbook.Sheets]

    # Create a list of unique 'SF' values
    unique_sf_values = set()
    for sheet_name in sheet_names:
        sheet = workbook.Sheets(sheet_name)
        last_row = sheet.Cells(sheet.Rows.Count, 1).End(xlUp).Row
        sf_values = sheet.Range(f"A2:A{last_row}").Value
        unique_sf_values.update(sf_values)

    # Iterate over each unique 'SF' value
    for sf_value in unique_sf_values:
        new_workbook_name = os.path.join(output_folder, f"wb1_{sf_value}.xlsx")
        new_workbook = excel.Workbooks.Add()  # Create a new workbook

        # Iterate over each sheet in the main workbook
        for sheet_name in sheet_names:
            sheet = workbook.Sheets(sheet_name)
            sheet.Copy(Before=new_workbook.Sheets(1))  # Copy sheet to the new workbook

            # Delete rows where 'SF' value does not match the current 'SF' value
            if sheet_name == 'Actuals($)' or sheet_name == 'Actuals(#)':
                new_sheet = new_workbook.Sheets(sheet.Name)
                last_row = new_sheet.Cells(new_sheet.Rows.Count, 1).End(xlUp).Row
                for i in range(last_row, 1, -1):
                    if new_sheet.Cells(i, 1).Value != sf_value:
                        new_sheet.Rows(i).Delete()

        # Save the new workbook
        new_workbook.SaveAs(new_workbook_name)
        new_workbook.Close()

    # Close the main workbook
    workbook.Close()
    excel.Quit()

# Main workbook containing multiple sheets
main_workbook = 'wb1.xlsx'

# Output folder for saving filtered workbooks
output_folder = 'filtered_workbooks/'

# Create output folder if it doesn't exist
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Filter and save data for each unique 'SF' value
filter_and_save_workbooks(main_workbook, output_folder)


4
4


In [None]:
from openpyxl import load_workbook
from openpyxl.styles import PatternFill, Font

# Function to apply formatting to a worksheet
def apply_formatting(ws):
    # Make the first row bold
    for cell in ws[1]:
        cell.font = Font(bold=True)

    # Set background color 'blue' with font color 'white' for columns M to T in the first row
    for col in range(13, 21):  # Columns M to T
        cell = ws.cell(row=1, column=col)
        cell.fill = PatternFill(start_color="000080", end_color="000080", fill_type="solid")
        cell.font = Font(color="FFFFFF")

    # Set light blue background color for all rows in columns M to T
    for row in range(2, ws.max_row + 1):
        for col in range(`13, 21):  # Columns M to T
            cell = ws.cell(row=row, column=col)
            cell.fill = PatternFill(start_color="ADD8E6", end_color="ADD8E6", fill_type="solid")

# Load the combined workbook
combined_workbook = load_workbook('combined_workbook.xlsx')

# Apply formatting to each sheet in the combined workbook
for sheet in combined_workbook.sheetnames:
    ws = combined_workbook[sheet]
    apply_formatting(ws)

# Save the modified combined workbook
combined_workbook.save('combined_workbook_formatted.xlsx')
`

In [None]:
import pandas as pd
from openpyxl import load_workbook

# Function to split sheets based on unique values of 'SF' column
def split_sheets(input_file, output_folder, sheets):
    for sheet_name in sheets:
        # Load sheet data
        df = pd.read_excel(input_file, sheet_name=sheet_name)
        
        # Convert 'SF' column to text type
        df['SF'] = df['SF'].astype(str)
        
        # Fetch unique values of 'SF'
        unique_sf_values = df['SF'].unique()
        
        # Create a new workbook for each unique 'SF' value
        for sf_value in unique_sf_values:
            # Filter sheet data based on 'SF' value
            filtered_df = df[df['SF'] == sf_value]
            
            # Write filtered data to a new workbook
            output_file = f"{output_folder}/{sheet_name}_{sf_value}.xlsx"
            with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
                filtered_df.to_excel(writer, index=False, sheet_name=sheet_name)

# Example usage
input_file = 'input_workbook.xlsx'
output_folder = 'output_workbooks'
sheets_to_process = ['Sheet1', 'Sheet2']  # List of sheet names to process
split_sheets(input_file, output_folder, sheets_to_process)


In [110]:
### ABSOLUTELY CORRECT

from openpyxl import load_workbook

# Load the workbook
workbook = load_workbook("sam.xlsx")

# Iterate through each sheet in the workbook
for sheet in workbook:
    # Find the index of the 'RT' column
    rt_column_index = None
    for col_idx in range(1, sheet.max_column + 1):
        if sheet.cell(row=1, column=col_idx).value == 'RT':
            rt_column_index = col_idx
            break

    # If 'RT' column found
    if rt_column_index:
        # Collect unique values from the 'RT' column
        unique_values = set(sheet.cell(row=row_idx, column=rt_column_index).value
                             for row_idx in range(2, sheet.max_row + 1))

        # Create a list to store sub-tables
        sub_tables = []

        # Create sub-tables for each unique 'RT' value
        for rt_value in unique_values:
            sub_table = []  # Initialize sub-table

            # Add heading for the sub-table
            sub_table.append([f"Sub-table for '{rt_value}'"])

            # Copy headers from the main table
            sub_table.append([sheet.cell(row=1, column=col_idx).value for col_idx in range(1, sheet.max_column + 1)])

            # Copy rows matching 'RT' value to sub-table
            for row_idx in range(2, sheet.max_row + 1):
                if sheet.cell(row=row_idx, column=rt_column_index).value == rt_value:
                    sub_table.append([sheet.cell(row=row_idx, column=col_idx).value for col_idx in range(1, sheet.max_column + 1)])

            # Add sub-table to the list
            sub_tables.append(sub_table)

        # Clear the original sheet
        sheet.delete_rows(1, sheet.max_row)

        # Add sub-tables to the sheet
        for sub_table in sub_tables:
            for sub_row in sub_table:
                sheet.append(sub_row)  # Append each row of the sub-table
            sheet.append([])  # Add a gap of one row after each sub-table

# Save the modified workbook
workbook.save("modified_workbook.xlsx")


In [80]:
from openpyxl import load_workbook
from openpyxl.styles import Font, Border, Side
from openpyxl.worksheet.table import Table, TableStyleInfo

# Load the workbook
workbook = load_workbook("sam.xlsx")

# Define font and border styles
header_font = Font(bold=True)
header_border = Border(bottom=Side(style='medium'))
table_border = Border(outline=True, right=Side(style='thin'), bottom=Side(style='thin'))

# Define table style
table_style = TableStyleInfo(name="TableStyleMedium9", showFirstColumn=False,
                              showLastColumn=False, showRowStripes=True, showColumnStripes=True)

# Iterate through each sheet in the workbook
for sheet in workbook:
    # Find the index of the 'RT' column
    rt_column_index = None
    for col_idx in range(1, sheet.max_column + 1):
        if sheet.cell(row=1, column=col_idx).value == 'RT':
            rt_column_index = col_idx
            break

    # If 'RT' column found
    if rt_column_index:
        # Collect unique values from the 'RT' column
        unique_values = set(sheet.cell(row=row_idx, column=rt_column_index).value
                             for row_idx in range(2, sheet.max_row + 1))

        # Create a list to store sub-tables
        sub_tables = []

        # Create sub-tables for each unique 'RT' value
        for rt_value in unique_values:
            sub_table = []  # Initialize sub-table

            # Copy headers from the main table
            header_row = []
            for col_idx in range(1, sheet.max_column + 1):
                header = sheet.cell(row=1, column=col_idx).value
                header_row.append(header)
            sub_table.append(header_row)

            # Add heading for the sub-table
            heading = [f"Sub-table for '{rt_value}'"]
            sub_table.append(heading)

            # Copy rows matching 'RT' value to sub-table
            for row_idx in range(2, sheet.max_row + 1):
                if sheet.cell(row=row_idx, column=rt_column_index).value == rt_value:
                    sub_row = [sheet.cell(row=row_idx, column=col_idx).value for col_idx in range(1, sheet.max_column + 1)]
                    sub_table.append(sub_row)

            # Add sub-table to the list
            sub_tables.append(sub_table)

        # Clear the original sheet
        sheet.delete_rows(1, sheet.max_row)

        # Add sub-tables to the sheet
        for sub_table_idx, sub_table in enumerate(sub_tables):
            # Append rows of the sub-table
            for sub_row_idx, sub_row in enumerate(sub_table):
                sheet.append(sub_row)

            # Apply formatting to the header row of the first sub-table
            if sub_table_idx == 0:
                for cell in sheet.iter_rows(min_row=1, max_row=1, max_col=sheet.max_column):
                    for header_cell in cell:
                        header_cell.font = header_font
                        header_cell.border = header_border

            # Apply borders to all cells within the sub-table
            for row_idx in range(1, len(sub_table) + 1):
                for col_idx in range(1, sheet.max_column + 1):
                    cell = sheet.cell(row=row_idx, column=col_idx)
                    cell.border = table_border

            # Insert space of 2 rows between sub-tables
            if sub_table_idx < len(sub_tables) - 1:
                sheet.insert_rows(sheet.max_row + 1, amount=2)

            # Create a table for the sub-table
            table_name = f"Table_{sheet.title}_{sub_table_idx}"  # Unique table name
            table = Table(displayName=table_name, ref=sheet.dimensions)
            table.tableStyleInfo = table_style
            print(f"{table_name} in sheet {sheet.title}")
            sheet.add_table(table)

# Save the modified workbook
workbook.save("modified_workbook.xlsx")


Table_Sheet1_0 in sheet Sheet1
Table_Sheet1_1 in sheet Sheet1
Table_Sheet1_2 in sheet Sheet1
Table_Sheet2_0 in sheet Sheet2
Table_Sheet2_1 in sheet Sheet2
Table_Sheet2_2 in sheet Sheet2


In [118]:
## good for borders
from openpyxl import load_workbook
from openpyxl.styles import Border, Side

# Load the workbook
workbook = load_workbook("modified_workbook.xlsx")

# Access a specific sheet
sheet = workbook["Sheet1"]

# Define border styles
border_style = Border(left=Side(style='thin'),
                      right=Side(style='thin'),
                      top=Side(style='thin'),
                      bottom=Side(style='thin'))

# Iterate through each row in the sheet
for row in sheet.iter_rows():
    # Check if the row has more than one cell with a value
    if sum(1 for cell in row if cell.value) > 1:
        # Iterate through each cell in the row
        for cell in row:
            # Apply the border style to the cell
            cell.border = border_style

# Save the modified workbook
workbook.save("modified_workbook1.xlsx")


In [90]:
no formatting is applied


SyntaxError: invalid syntax (<ipython-input-90-2796c81f9b12>, line 1)

In [5]:
import pandas as pd

# Sample DataFrame
data = {
    'sub function': ['sf1', 'sf2', 'sf3', 'sf1', 'sf2', 'sf3','sf1','sf1'],
    'col1': [1, 2, 3, 4, 5, 6,44,66],
    'col2': ['A', 'B', 'C', 'D', 'E', 'F','g','h']
}
df = pd.DataFrame(data)

# Group DataFrame by 'sub function'
grouped = df.groupby('sub function')

# Iterate over groups and create separate Excel files
for sub_function, group_df in grouped:
    # Create a new Excel writer for the current sub function
    writer = pd.ExcelWriter(f"{sub_function}_output.xlsx", engine='xlsxwriter')
    
    # Write each sheet to the Excel file
    for idx, (sheet_name, sheet_df) in enumerate(group_df.groupby(level=0)):
        # Generate a sheet name based on the index
        sheet_name = f"Sheet{idx + 1}"
        
        # Truncate sheet name if it exceeds 31 characters
        if len(sheet_name) > 31:
            sheet_name = sheet_name[:31]
        sheet_df.to_excel(writer, sheet_name=sheet_name, index=False)
    
    # Save the Excel file
    writer.save()


In [None]:
import pandas as pd

# Load the main workbook
main_wb = pd.ExcelFile("main_workbook.xlsx")

# Get the list of sheet names in the main workbook
sheet_names = main_wb.sheet_names

# Iterate over each unique sub function value
for sub_func in df['sub function'].unique():
    # Create a new workbook
    new_wb = pd.ExcelWriter(f"{sub_func}_workbook.xlsx", engine='xlsxwriter')
    
    # Iterate over each sheet in the main workbook
    for sheet_name in sheet_names:
        # Read the data from the current sheet
        df = pd.read_excel(main_wb, sheet_name=sheet_name)
        
        # Filter the data based on the current sub function value
        filtered_df = df[df['sub function'] == sub_func]
        
        # Write the filtered data to the corresponding sheet in the new workbook
        filtered_df.to_excel(new_wb, sheet_name=sheet_name, index=False)
    
    # Save the new workbook
    new_wb.save()


In [None]:
import pandas as pd
from openpyxl import load_workbook

# Function to read data from another Excel file and return as DataFrame
def read_data(file_path, sheet_name):
    workbook = load_workbook(file_path)
    df = pd.read_excel(workbook[sheet_name], engine='openpyxl')
    return df

# Load the main workbook with subtables
main_workbook = load_workbook("main_workbook.xlsx")

# Load the additional workbook
additional_workbook = load_workbook("additional_workbook.xlsx")

# Get the sheet names from the main workbook
sheet_names = main_workbook.sheetnames

# Iterate through each sheet in the main workbook
for sheet_name in sheet_names:
    # Read data from the additional workbook
    additional_df = read_data("additional_workbook.xlsx", sheet_name)
    
    # Read data from the main workbook
    main_df = pd.read_excel(main_workbook[sheet_name], engine='openpyxl')
    
    # Determine the row index where the subtables start
    subtable_start_row = len(additional_df) + 3  # Assuming 3 rows gap between additional data and subtables
    
    # Write additional data to the main workbook
    with pd.ExcelWriter("main_workbook.xlsx", engine='openpyxl', mode='a') as writer:
        additional_df.to_excel(writer, sheet_name=sheet_name, startrow=0, index=False)
    
    # Write subtables below the additional data
    with pd.ExcelWriter("main_workbook.xlsx", engine='openpyxl', mode='a') as writer:
        main_df.to_excel(writer, sheet_name=sheet_name, startrow=subtable_start_row, index=False)
