# Setup

In [6]:
import pandas as pd
import PIL
from openpyxl import load_workbook
from openpyxl.drawing.image import Image as XLImage 
from PIL import Image as PILImage
import shutil
import os
import zipfile

## Define inputs

In [15]:
# Imput request date YYY-MM-DD
request_date = '2024-10-21'

# Imput email column
input_file_email_column = 'email'

# imput file path definition
input_file_path = request_date + '/' + request_date + '_request.xlsx'

# Users data file path definition
data_path = request_date + '/' + request_date + '_users_data.xlsx'

# Output directory definition
output_folder = request_date + '/' + request_date + '_output'

# Base files
logo_path = "mc_logo.png"
template_path = "irm template.xlsx"


In [8]:
# Create new directory if needed
os.makedirs(request_date, exist_ok=True)
os.makedirs(output_folder, exist_ok=True)

## Define functions

In [9]:
def generate_query(input_file_path, email_column='email'):
    # Base query
    base_query = """--query used for the requests of user single data
SELECT
profileid AS`ProfileId`,
first_name AS`First Name`,
last_name AS`Last Name`,
normalized_name AS`Normalized Name`,
date(FROM_UNIXTIME(birth_date / 1000)) AS`Birthdate`,
email AS`Primary Email Address`,
email_addresses_all AS`Email Addresses (all)`,
REPLACE(primary_phone, '+', '') AS`Primary Phone`,
REPLACE(phone_number, '+', '') AS`Phone Numbers`,
REPLACE(phone_numbers_all, '+', '') AS`Phone Numbers (all)`,
REPLACE(shopify_phone, '+', '') AS`Shopify Phone`,
email_campaign_id_all AS`Email CampaignId (all)`,
email_id_all AS`EmailId (all)`,
city AS`City (all)`,
mr_geo_city_name AS`City (mostrecent)`,
geo_city_name AS`Cityname`,
geo_subdivision_1_name AS`State (all)`,
mr_geo_subdivision_1_name AS`State (mostrecent)`,
geo_subdivision_1_iso_code AS`Statecode (all)`,
mr_geo_subdivision_1_iso_code AS`Statecode (mostrecent)`,
geo_subdivision_2_name AS`County (all)`,
mr_geo_subdivision_2_name AS`County (mostrecent)`,
geo_subdivision_2_iso_code AS`Countycode (all)`,
mr_geo_subdivision_2_iso_code AS`Countycode (mostrecent)`,
geo_continent_code AS`Continentcode (all)`,
geo_continent_name AS`Continent (all)`,
geo_country_iso_code AS`Countrycode`,
country AS`Country (all)`,
geo_metro_code AS`DesignatedMarketAreacode (all)`,
mr_geo_metro_code AS`Designated Market Area code (mostrecent)`,
geo_geoname_id AS`GeonameID (all)`, 
mr_geo_geoname_id AS`GeonameID (mostrecent)`,
geo_latlong AS`Coordinates (all)`, 
geo_time_zone AS`Timezone (all)`,
mr_geo_time_zone AS`Timezone (mostrecent)`,
geo_zipcode AS`Zipcode`,
mr_geo_zipcode AS`Zipcode (mostrecent)`,
postal_code AS`Postalcode`,
language AS`Language`,
experian_ethnic_religion AS`Experian Ethnic Religion`,
experian_gender AS`Experian Gender`
from prod_products.cdp_reporting.bcexport_allconsumers
WHERE email in (
{email_list}
)"""

    # Read Excel file
    df = pd.read_excel(input_file_path)
    
    # Extract emails, convert to lowercase, and format them
    emails = df[email_column].str.lower().tolist()
    formatted_emails = ',\n'.join(f'"{email}"' for email in emails)
    
    # Insert formatted emails into query
    final_query = base_query.format(email_list=formatted_emails)
    
    return final_query


def generate_individual_files(request_date,template_path, data_path, output_folder, logo_path):
    # Load the file with user data
    df = pd.read_excel(data_path)
    
    # Create output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)
    
    # List to store created file names
    created_files = []
    
    # Iterate through each row of the DataFrame
    for _, row in df.iterrows():
        # Create a copy of the template for each user
        email = row['Primary Email Address']  # Adjust according to the exact column name
        output_path = f"{output_folder}/{email}.xlsx"
        shutil.copyfile(template_path, output_path)
        
        # Load the template copy for modification
        wb = load_workbook(output_path)
        ws = wb.active
        
        # Insert user data in row 4
        for col_idx, value in enumerate(row, start=1):
            ws.cell(row=4, column=col_idx, value=value)
        
        # Add logo in cell A1 (adjust position as needed)
        img = XLImage(logo_path)
        ws.add_image(img, "A1")  # Adjust the cell where you want the logo to appear
        
        # Save the modified file
        wb.save(output_path)
        print(f"File saved: {output_path}")
        
        # Add the file name to the list
        created_files.append(output_path)
    
    # Create ZIP file with all generated files
    zip_path = f"{output_folder}/{request_date}.zip"
    with zipfile.ZipFile(zip_path, 'w') as zipf:
        for file in created_files:
            zipf.write(file, os.path.basename(file))
    
    print(f"ZIP file created: {zip_path}")


# Create databricks query

In [12]:
query = generate_query(input_file_path, email_column=input_file_email_column)
print(query)

--query used for the requests of user single data
SELECT
profileid AS`ProfileId`,
first_name AS`First Name`,
last_name AS`Last Name`,
normalized_name AS`Normalized Name`,
date(FROM_UNIXTIME(birth_date / 1000)) AS`Birthdate`,
email AS`Primary Email Address`,
email_addresses_all AS`Email Addresses (all)`,
primary_phone AS`Primary Phone`,
phone_number AS`Phone Numbers`,
phone_numbers_all AS`Phone Numbers (all)`,
shopify_phone AS`Shopify Phone`,
email_campaign_id_all AS`Email CampaignId (all)`,
email_id_all AS`EmailId (all)`,
city AS`City (all)`,
mr_geo_city_name AS`City (mostrecent)`,
geo_city_name AS`Cityname`,
geo_subdivision_1_name AS`State (all)`,
mr_geo_subdivision_1_name AS`State (mostrecent)`,
geo_subdivision_1_iso_code AS`Statecode (all)`,
mr_geo_subdivision_1_iso_code AS`Statecode (mostrecent)`,
geo_subdivision_2_name AS`County (all)`,
mr_geo_subdivision_2_name AS`County (mostrecent)`,
geo_subdivision_2_iso_code AS`Countycode (all)`,
mr_geo_subdivision_2_iso_code AS`Countycode (m

# Create users data files

In [16]:
generate_individual_files(request_date,template_path, data_path, output_folder, logo_path)

File saved: 2024-10-21/2024-10-21_output/rheafrawley84@gmail.com.xlsx
File saved: 2024-10-21/2024-10-21_output/tonyburton23@aol.com.xlsx
File saved: 2024-10-21/2024-10-21_output/mr.magic34@yahoo.com.xlsx
File saved: 2024-10-21/2024-10-21_output/debbyg65@icloud.co.xlsx
File saved: 2024-10-21/2024-10-21_output/91peterbuilt377@gmail.com.xlsx
File saved: 2024-10-21/2024-10-21_output/alfie66332001@yahoo.com.xlsx
File saved: 2024-10-21/2024-10-21_output/alwandamattox@gmail.com.xlsx
File saved: 2024-10-21/2024-10-21_output/anita.laslo@yahoo.com.xlsx
File saved: 2024-10-21/2024-10-21_output/bcostellos1278@gmail.com.xlsx
File saved: 2024-10-21/2024-10-21_output/biggramps@gmail.com.xlsx
File saved: 2024-10-21/2024-10-21_output/brigdob@hotmail.com.xlsx
File saved: 2024-10-21/2024-10-21_output/cindylarry@comcast.net.xlsx
File saved: 2024-10-21/2024-10-21_output/clintdegeyter@gmail.com.xlsx
File saved: 2024-10-21/2024-10-21_output/cohotruman@gmail.com.xlsx
File saved: 2024-10-21/2024-10-21_output/d

  return self._open_to_write(zinfo, force_zip64=force_zip64)
