In [1]:

!pip install python-docx openpyxl pandas

import pandas as pd
import openpyxl
import os
from docx import Document
from docx.shared import Inches
from google.colab import files
import io
from openpyxl.drawing.image import Image as OpenpyxlImage


print("Please upload your Excel file now.")
uploaded = files.upload()
file_path = list(uploaded.keys())[0]
print(f"✅ Uploaded file: {file_path}")


wb = openpyxl.load_workbook(file_path)
ws = wb.active

os.makedirs("excel_images", exist_ok=True)

images = []
for idx, image in enumerate(ws._images, start=1):
    img_bytes = image._data()
    img_path = f"excel_images/image_{idx}.png"
    with open(img_path, "wb") as f:
        f.write(img_bytes)

    excel_row = image.anchor._from.row + 1
    df_row = excel_row - 2
    images.append({
        "col": image.anchor._from.col,
        "excel_row": excel_row,
        "df_row": df_row,
        "path": img_path
    })

print("✅ Extracted images:", [(img["col"], img["excel_row"], img["path"]) for img in images])


style_col_letter = "D"

merged_ranges = list(ws.merged_cells.ranges)

for merged_range in merged_ranges:

    if style_col_letter in str(merged_range):
        min_col, min_row, max_col, max_row = merged_range.bounds
        merged_value = ws.cell(min_row, min_col).value


        ws.unmerge_cells(str(merged_range))


        for r in range(min_row, max_row + 1):
            ws.cell(r, min_col).value = merged_value


ws._images = []


temp_fixed_path = "fixed.xlsx"
wb.save(temp_fixed_path)


df = pd.read_excel(temp_fixed_path)
print("✅ Data loaded (Column D fixed):")
print(df.head(15))


asterisk_cells = []
question_cells = []
for col in df.columns:
    for row_idx, cell in enumerate(df[col]):
        if isinstance(cell, str):
            if '*' in cell:
                asterisk_cells.append({
                    "row_idx": row_idx,
                    "col_name": col,
                    "cell_value": cell
                })
            if '?' in cell:
                question_cells.append({
                    "row_idx": row_idx,
                    "col_name": col,
                    "cell_value": cell
                })

print(f"✅ Found {len(asterisk_cells)} asterisk-marked cells.")
print(f"✅ Found {len(question_cells)} question-marked cells.")


style_photo_col_idx = None
for i, col in enumerate(df.columns):
    if "Style" in str(col) and "Photo" in str(col):
        style_photo_col_idx = i
        break


doc = Document()
doc.add_heading("Generated Mail Templates (Only Asterisk Marked Cells)", 0)

for cell_info in asterisk_cells:
    row_idx = cell_info["row_idx"]
    col_name = cell_info["col_name"]
    cell_value = cell_info["cell_value"]


    style_no = str(df.iloc[row_idx, 3])
    if pd.isna(style_no) or str(style_no).strip() == "":
        style_no = "Unknown"

    style_photo_path = None
    if style_photo_col_idx is not None:
        candidates = [img for img in images if img["col"] == style_photo_col_idx]
        if candidates:

            nearest_img = min(candidates, key=lambda img: abs(img["df_row"] - row_idx))
            style_photo_path = nearest_img["path"]

    subject = f"Regarding Style No: {style_no}, {col_name}, Date: {cell_value.replace('*','').replace('?','')}"
    body = f"""
Hi,

This is to let you know regarding style no.: {style_no} has a deadline in: {cell_value.replace('*','').replace('?','')} regarding this: {col_name}.

Please, update us as soon as possible.

Thank You
"""
    doc.add_heading(subject, level=1)
    doc.add_paragraph(body)
    if style_photo_path:
        doc.add_picture(style_photo_path, width=Inches(2))
    else:
        doc.add_paragraph("[⚠️ No image found for this row]")
    doc.add_paragraph("\n" + "-"*50 + "\n")


output_doc_path = "mail_templates_starred.docx"
doc.save(output_doc_path)
print(f"✅ Mail templates saved to {output_doc_path}")
files.download(output_doc_path)


if question_cells:

    new_wb = openpyxl.Workbook()
    new_ws = new_wb.active


    headers = ["Style Photo", "Style No.", "Corresponding Header Name"]
    new_ws.append(headers)


    excel_data = []
    for cell_info in question_cells:
        row_idx = cell_info["row_idx"]
        col_name = cell_info["col_name"]

        style_no = str(df.iloc[row_idx, 3])
        if pd.isna(style_no) or str(style_no).strip() == "":
            style_no = "Unknown"

        style_photo_path = None
        if style_photo_col_idx is not None:
            candidates = [img for img in images if img["col"] == style_photo_col_idx]
            if candidates:
                nearest_img = min(candidates, key=lambda img: abs(img["df_row"] - row_idx))
                style_photo_path = nearest_img["path"]

        excel_data.append({
            "Style Photo": style_photo_path,
            "Style No.": style_no,
            "Corresponding Header Name": col_name
        })


    for row_idx, row_data in enumerate(excel_data, start=2):

        new_ws.cell(row=row_idx, column=2, value=row_data["Style No."])
        new_ws.cell(row=row_idx, column=3, value=row_data["Corresponding Header Name"])


        img_path = row_data["Style Photo"]
        if img_path and os.path.exists(img_path):
            img = OpenpyxlImage(img_path)

            img.width = 100
            img.height = 100


            new_ws.row_dimensions[row_idx].height = 75
            new_ws.column_dimensions['A'].width = 15

            new_ws.add_image(img, f'A{row_idx}')


    output_excel_path = "marked_cells_report.xlsx"
    new_wb.save(output_excel_path)

    print(f"✅ Excel report for '?' cells with embedded images saved to {output_excel_path}")
    files.download(output_excel_path)
else:
    print("⚠️ No '?' marked cells found. Skipping Excel file generation.")


Collecting python-docx
  Downloading python_docx-1.2.0-py3-none-any.whl.metadata (2.0 kB)
Downloading python_docx-1.2.0-py3-none-any.whl (252 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m253.0/253.0 kB[0m [31m945.0 kB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: python-docx
Successfully installed python-docx-1.2.0
Please upload your Excel file now.


Saving Formatted_Order Fynch-Hatton.xlsx to Formatted_Order Fynch-Hatton.xlsx
✅ Uploaded file: Formatted_Order Fynch-Hatton.xlsx
✅ Extracted images: [(0, 3, 'excel_images/image_1.png'), (0, 13, 'excel_images/image_2.png'), (0, 20, 'excel_images/image_3.png'), (0, 26, 'excel_images/image_4.png'), (0, 30, 'excel_images/image_5.png'), (0, 34, 'excel_images/image_6.png')]
✅ Data loaded (Column D fixed):
    Style \nPhoto                                          Style No. Fectory  \
0             NaN  1502 314   \nO-Neck, Air Cotton   \n\nSOKLPO01...   Sonia   
1             NaN                                                NaN     NaN   
2             NaN                                                NaN     NaN   
3             NaN                                                NaN     NaN   
4             NaN                                                NaN     NaN   
5             NaN                                                NaN     NaN   
6             NaN                    

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ Excel report for '?' cells with embedded images saved to marked_cells_report.xlsx


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>