In [6]:
# =======================
# Install Required Libraries
# =======================
!pip install python-docx openpyxl pandas

import pandas as pd
import openpyxl
import os
from docx import Document
from docx.shared import Inches
from google.colab import files

# =======================
# Step 1: Upload Excel File
# =======================
uploaded = files.upload()
file_path = list(uploaded.keys())[0]
print("✅ Uploaded file:", file_path)

# =======================
# Step 2: Extract Images with Correct Row Mapping
# =======================
wb = openpyxl.load_workbook(file_path)
ws = wb.active

os.makedirs("excel_images", exist_ok=True)

images = []
for idx, image in enumerate(ws._images, start=1):
    img_bytes = image._data()
    img_path = f"excel_images/image_{idx}.png"
    with open(img_path, "wb") as f:
        f.write(img_bytes)

    # openpyxl rows are 0-based, Excel rows are 1-based, DataFrame rows start at Excel row 2
    excel_row = image.anchor._from.row + 1   # convert to Excel row (1-based)
    df_row = excel_row - 2                   # convert Excel row to DataFrame index
    images.append({
        "col": image.anchor._from.col,
        "excel_row": excel_row,
        "df_row": df_row,
        "path": img_path
    })

print("✅ Extracted images:", [(img["col"], img["excel_row"], img["path"]) for img in images])

# =======================
# Step 3: Load Excel into DataFrame
# =======================
df = pd.read_excel(file_path)
print("✅ Data loaded:")
print(df.head())

# =======================
# Step 4: Identify cells with '*'
# =======================
asterisk_cells = []
for col in df.columns:
    for row_idx, cell in enumerate(df[col]):
        if isinstance(cell, str) and '*' in cell:
            asterisk_cells.append({
                "row_idx": row_idx,
                "col_name": col,
                "cell_value": cell
            })

print(f"✅ Found {len(asterisk_cells)} asterisk-marked cells")

# =======================
# Step 5: Locate Style Photo Column
# =======================
style_photo_col_idx = None
for i, col in enumerate(df.columns):
    if "Style" in col and "Photo" in col:
        style_photo_col_idx = i
        break

# =======================
# Step 6: Generate Mail Templates
# =======================
doc = Document()
doc.add_heading("Generated Mail Templates (Only Asterisk Marked Cells)", 0)

for cell_info in asterisk_cells:
    row_idx = cell_info["row_idx"]
    col_name = cell_info["col_name"]
    cell_value = cell_info["cell_value"]

    style_no = str(df.loc[row_idx, "Style No."]) if "Style No." in df.columns else "Unknown"
    if pd.isna(style_no) or style_no.strip() == "":
        style_no = "Unknown"

    style_photo_path = None
    if style_photo_col_idx is not None:
        # match the DataFrame row to an image df_row
        candidates = [img for img in images if img["col"] == style_photo_col_idx]
        if candidates:
            nearest_img = min(candidates, key=lambda img: abs(img["df_row"] - row_idx))
            style_photo_path = nearest_img["path"]

    # Construct mail text
    subject = f"Regarding Style No: {style_no}, {col_name}, Date: {cell_value.replace('*','')}"
    body = f"""
Hi,

This is to let you know regarding style no.:{style_no} has a deadline in: {cell_value.replace('*','')} regarding this: {col_name}.

Please, update us as soon as possible.

Thank You
"""
    doc.add_heading(subject, level=1)
    doc.add_paragraph(body)
    if style_photo_path:
        doc.add_picture(style_photo_path, width=Inches(2))
    else:
        doc.add_paragraph("[⚠️ No image found for this row]")
    doc.add_paragraph("\n" + "-"*50 + "\n")

# =======================
# Step 7: Save DOCX
# =======================
output_path = "mail_templates_starred.docx"
doc.save(output_path)
print(f"✅ Mail templates saved to {output_path}")
files.download(output_path)




Saving Formatted_Order Fynch-Hatton (3) (3).xlsx to Formatted_Order Fynch-Hatton (3) (3) (3).xlsx
✅ Uploaded file: Formatted_Order Fynch-Hatton (3) (3) (3).xlsx
✅ Extracted images: [(0, 3, 'excel_images/image_1.png'), (0, 13, 'excel_images/image_2.png'), (0, 20, 'excel_images/image_3.png'), (0, 26, 'excel_images/image_4.png'), (0, 30, 'excel_images/image_5.png'), (0, 34, 'excel_images/image_6.png')]
✅ Data loaded:
   Style \nPhoto                                          Style No. Fectory  \
0            NaN  1502 314   \nO-Neck, Air Cotton   \n\nSOKLPO01...   Sonia   
1            NaN                                                NaN     NaN   
2            NaN                                                NaN     NaN   
3            NaN                                                NaN     NaN   
4            NaN                                                NaN     NaN   

                                         Quality    Yarn COO  \
0  2/40s 100 Combed Cotton Heather \n\n$3.6

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>