In [172]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import math
import os
import sys

sys.path.append('../')

In [176]:
variation_template = pd.read_csv('../data/variation_template_v2.csv')
data_df = pd.read_csv('../data/courses-data-sample-btech-v2.csv')
output_dir = '../data/product_output/'

VARIATION_NAME = "Name"
VARIATION_SKU = "SKU"
VARIATION_IMAGE = "Images"
VARIATION_PARENT = "Parent"
VARIATION_CATEGORY = "Categories"
VARIATION_DESCRIPTION = "Description"

FLOAT_TO_INT = ['Meta: _et_pb_gutter_width']
PRODUCT_CODE_COL = 'aff_course_code'
AFFILIATION_BRANCH_NAME = 'aff_branch_names'
SKU_COLS = [PRODUCT_CODE_COL, 'aff_name', 'aff_course_code']
NAME_COLS = [PRODUCT_CODE_COL, '2l_course_name']
PRODUCT_VIDEO_COLS = ['almvid', 'psmvid', 'gpsmvid']
PRODUCT_IMAGE_COL = 'course_image'
SEARCH_FORM = "{{%term}}"
FACULTY_COLS = ['Name_Fac{0}', 'Pic_Fac{0}', 'Desc_Fac{0}']
FAC_REPLACE = ["professor", "faculty_image", "prof description"]

# Key div changes. This will need to be replaced every time a change is made to divi product-template
VIDEO_SLIDER_DIV = '[et_pb_video_slider_item src="https://www.youtube.com/watch?v=FkQuawiGWUw" _builder_version="4.19.0" _module_preset="default" global_colors_info="{}" show_image_overlay="off"][/et_pb_video_slider_item]'
VIDEO_SLIDER_REPLACE = '[et_pb_video_slider_item src="{{video}}" _builder_version="4.19.0" _module_preset="default" global_colors_info="{}" show_image_overlay="off"][/et_pb_video_slider_item]'
FACULTY_DIV =  '[et_pb_column type="1_3" _builder_version="4.16" custom_padding="|||" global_colors_info="{}" custom_padding__hover="|||"][et_pb_team_member name="{{professor}}" image_url="{{faculty_image}}" facebook_url="#" twitter_url="#" google_url="#" linkedin_url="#" icon_color="RGBA(255,255,255,0)" use_icon_font_size="on" icon_font_size="1px" _builder_version="4.19.0" header_font="Archivo|700|||||||" header_text_color="#544FFF" header_font_size="18px" header_line_height="1.7em" body_font="||||||||" body_line_height="1.8em" background_color="#ffffff" text_orientation="center" custom_margin="||0px|" custom_padding="30px|3.7vw|0px|3.7vw|false|true" custom_padding_tablet="|17vw||17vw|false|true" custom_padding_phone="|6vw||6vw|false|true" custom_padding_last_edited="on|phone" animation_style="zoom" animation_intensity_zoom="8%" hover_enabled="0" border_radii="on|10px|10px|10px|10px" border_radii_image="on|4px|4px|4px|4px" box_shadow_style="preset1" box_shadow_color="rgba(110,130,208,0.18)" box_shadow_style_image="preset1" icon_hover_color="#f73145" global_colors_info="{%22gcid-e5065624-c60f-4aa9-bd26-2cf3c4270913%22:%91%22header_text_color%22%93}" icon_color__hover="RGBA(255,255,255,0)" icon_color__hover_enabled="on|hover" sticky_enabled="0"]<p>{{prof description}}</p>[/et_pb_team_member]'

VIDEO_SLIDER_DIV2 = '[et_pb_code _builder_version="4.19.0" _module_preset="default" border_radii="on|20px|20px|20px|20px" border_width_all="2px" border_color_all="#FFFFFF" border_style_all="ridge" box_shadow_style="preset1" global_colors_info="{}"][h5p id="30"][/et_pb_code]'
VIDEO_SLIDER_REPLACE2 = '[et_pb_code _builder_version="4.19.0" _module_preset="default" border_radii="on|20px|20px|20px|20px" border_width_all="2px" border_color_all="#FFFFFF" border_style_all="ridge" box_shadow_style="preset1" global_colors_info="{}"][h5p id="{{video}}"][/et_pb_code]'
FACULTY_DIV2 = '[et_pb_column type="1_3" _builder_version="4.16" custom_padding="|||" global_colors_info="{}" custom_padding__hover="|||"][et_pb_team_member name="{{professor}}" image_url="{{faculty_image}}" facebook_url="#" twitter_url="#" google_url="#" linkedin_url="#" icon_color="RGBA(255,255,255,0)" use_icon_font_size="on" icon_font_size="1px" _builder_version="4.19.0" header_font="Archivo|700|||||||" header_text_color="#544FFF" header_font_size="18px" header_line_height="1.7em" body_font="||||||||" body_line_height="1.8em" background_color="#ffffff" text_orientation="center" custom_margin="||0px|" custom_padding="30px|3.7vw|0px|3.7vw|false|true" custom_padding_tablet="|17vw||17vw|false|true" custom_padding_phone="|6vw||6vw|false|true" custom_padding_last_edited="on|phone" child_filter_saturate="110%" child_filter_brightness="105%" child_filter_contrast="120%" animation_style="zoom" animation_intensity_zoom="8%" border_radii="on|28px|28px|28px|28px" border_width_all="2px" border_radii_image="on|15px|15px|15px|15px" border_color_all_image="RGBA(255,255,255,0)" box_shadow_style="preset1" box_shadow_color="rgba(110,130,208,0.18)" box_shadow_style_image="preset1" icon_hover_color="#f73145" global_colors_info="{%22gcid-e5065624-c60f-4aa9-bd26-2cf3c4270913%22:%91%22header_text_color%22%93}" icon_color__hover="RGBA(255,255,255,0)" icon_color__hover_enabled="on|hover"]<p>{{prof description}}</p>[/et_pb_team_member][/et_pb_column]'

In [207]:
def get_search_term(term):
    return SEARCH_FORM.replace('%term', term)

def generate_sku(data_row):
    sku_list = ["-".join(data_row[col].lower().split(" ")) for col in SKU_COLS]
    return "-".join(sku_list)

def generate_product_name(data_row):
    return "-".join([data_row[col] for col in NAME_COLS])

def replace_product_videos(data_row, description):
    
    for i in PRODUCT_VIDEO_COLS:
        if not is_nan(data_row[i]):
            new_vid = VIDEO_SLIDER_REPLACE2.replace("{{video}}", str(int(data_row[i])))
            description = description.replace(VIDEO_SLIDER_DIV2, new_vid, 1)
            
    # If there are sample videos still left, remove them before generating description
    return description.replace(VIDEO_SLIDER_DIV2, "")

def replace_faculty_info(data_row, description):
    for i in range(3):
        if isinstance(data_row[FACULTY_COLS[0].format(i+1)], str):
            # Faculty Exists
            b = FACULTY_DIV2
            for j in range(3):
                d_col = data_row[FACULTY_COLS[j].format(i+1)]
                b = b.replace(get_search_term(FAC_REPLACE[j]), d_col)                
                
            description = description.replace(FACULTY_DIV2, b, 1)
      
    # Remove unused faculty divs
    return description.replace(FACULTY_DIV2, "")
    

def generate_branches(data_df, product_code):
    aff_df = data_df[data_df[PRODUCT_CODE_COL] == product_code].copy(deep=True)
    aff_df.fillna("", inplace=True)
    
    affiliations = aff_df[AFFILIATION_BRANCH_NAME].tolist()
    aff_join = ", ".join(affiliations)
    aff_join = ",and ".join(aff_join.rsplit(",", 1))
    
    if len(affiliations) > 1:
        aff_join += " branches"
    else:
        aff_join += " branch"
    
    return aff_join

def generate_product_description(description, data_row, data_df):
    product_code = None
    for col_name, col_value in data_row.items():
        if not isinstance(col_value, str):
            if not math.isnan(col_value):
                col_value = int(col_value)
            else:
                continue
                
        if col_name == PRODUCT_CODE_COL:
            product_code = col_value
        if col_name == AFFILIATION_BRANCH_NAME:
            col_value  = generate_branches(data_df, product_code)
            
        description = description.replace(get_search_term(col_name), str(col_value))
        
    description = replace_product_videos(data_row, description)
    description = replace_faculty_info(data_row, description)
            
    return description

def is_nan(val):
    if isinstance(val, str):
        return False
    
    return math.isnan(val)

def generate_variation_data(variation, data_row, data_df):
    
    variation[VARIATION_NAME] = variation[VARIATION_NAME].replace(get_search_term("name"), generate_product_name(data_row))
     
    if not is_nan(variation[VARIATION_SKU]):
        variation[VARIATION_SKU] = variation[VARIATION_SKU].replace(get_search_term("sku"), generate_sku(data_row))
        
    if not is_nan(variation[VARIATION_IMAGE]):
        variation[VARIATION_IMAGE] = variation[VARIATION_IMAGE].replace(get_search_term(PRODUCT_IMAGE_COL), data_row[PRODUCT_IMAGE_COL])
    
    if not is_nan(variation[VARIATION_PARENT]):
        variation[VARIATION_PARENT] = variation[VARIATION_PARENT].replace(get_search_term("sku"), generate_sku(data_row))
        
    if not is_nan(variation[VARIATION_CATEGORY]):
        variation[VARIATION_CATEGORY] = variation[VARIATION_CATEGORY].replace(get_search_term("category"), data_row["domain"])
    
    if not is_nan(variation[VARIATION_DESCRIPTION]):
        variation[VARIATION_DESCRIPTION] = generate_product_description(variation[VARIATION_DESCRIPTION], data_row, data_df)
        
    return variation

In [209]:
variation_template.iloc[0]['Description']
#replace_product_videos(data_df.iloc[0], variation_template.iloc[0]['Description'])

'[et_pb_section fb_built="1" _builder_version="4.19.0" _module_preset="default" background_color="gcid-6119d853-c90e-46a8-9c49-16afd63cb690" global_colors_info="{%22gcid-6119d853-c90e-46a8-9c49-16afd63cb690%22:%91%22background_color_gradient_stops%22,%22background_color%22%93,%22gcid-9e2de924-2a97-4fc3-aeaf-dd33006f3c4d%22:%91%22background_color_gradient_stops%22%93,%22gcid-9dfb48d1-038c-4d95-9042-c8cb80e2d5c6%22:%91%22background_color_gradient_stops%22%93,%22gcid-aa0f989a-f66e-4f42-9fe2-f96930f37ba6%22:%91%22background_color_gradient_stops%22%93}"][et_pb_row _builder_version="4.19.0" _module_preset="default" global_colors_info="{}"][et_pb_column type="4_4" _builder_version="4.19.0" _module_preset="default" global_colors_info="{}"][et_pb_wc_title _builder_version="4.19.0" _module_preset="default" header_font="Archivo|700||on|||||" header_text_color="#FFFFFF" header_font_size="3vw" background_enable_color="off" header_font_size_tablet="4vw" header_font_size_phone="6vw" header_font_size_

In [210]:
len(variation_template.iloc[0]['Description'].split(FACULTY_DIV2))

4

In [211]:
len(variation_template.iloc[0]['Description'].split(VIDEO_SLIDER_DIV2))

4

In [212]:
def generate_products(variation_df, data_df):
    generated_codes = set()
    res = []
    for idx, data_row in tqdm(data_df.head(8).iterrows(), total=len(data_df)):
        if data_row[PRODUCT_CODE_COL] not in generated_codes:    
            generated_codes.add(data_row[PRODUCT_CODE_COL])
            for ix, variation_row in variation_df.iterrows():
                res.append(generate_variation_data(variation_row, data_row, data_df))
    
    return res

def massage_types(df):
    df = df.fillna("", inplace=False)

    for tt in FLOAT_TO_INT:
        k = [int(j) if j else "" for j in df[tt] ]
        df[tt] = k
    return df

In [213]:
res = pd.DataFrame(generate_products(variation_template, data_df))
res = massage_types(res)
res.to_csv(f"{output_dir}/test_export2.csv", index=False)

  0%|▏                                        | 8/2128 [00:00<00:02, 794.43it/s]


In [214]:
generate_variation_data(variation_template.iloc[1].copy(deep=True), data_df.iloc[0].copy(deep=True), data_df)

Type                                                                       variation, virtual
SKU                                                                                       NaN
Name                                                       KCS301-Data Structures - Essential
Published                                                                                   1
Is featured?                                                                                0
                                                                          ...                
Meta: _global_colors_info                                                                 NaN
Meta: _et_builder_version                                                                 NaN
Meta: _et_pb_show_page_creation                                                           NaN
Meta: _et_pb_woo_page_content_status                                                      NaN
Meta: _et_builder_dynamic_assets_loading_attr_threshold     

In [93]:
jj = pd.read_csv("../data/product_output/test_export2.csv")
jj[GUTTER_COL].astype(int)

IntCastingNaNError: Cannot convert non-finite values (NA or inf) to integer

In [22]:
kc = generate_product_description(variation_template.iloc[0]['Description'], data_df.iloc[0], data_df)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  aff_df.fillna("", inplace=True)


In [23]:
kc

'[et_pb_section fb_built="1" _builder_version="4.19.0" _module_preset="default" background_color="gcid-6119d853-c90e-46a8-9c49-16afd63cb690" global_colors_info="{%22gcid-6119d853-c90e-46a8-9c49-16afd63cb690%22:%91%22background_color_gradient_stops%22,%22background_color%22%93,%22gcid-9e2de924-2a97-4fc3-aeaf-dd33006f3c4d%22:%91%22background_color_gradient_stops%22%93,%22gcid-9dfb48d1-038c-4d95-9042-c8cb80e2d5c6%22:%91%22background_color_gradient_stops%22%93,%22gcid-aa0f989a-f66e-4f42-9fe2-f96930f37ba6%22:%91%22background_color_gradient_stops%22%93}"][et_pb_row _builder_version="4.19.0" _module_preset="default" global_colors_info="{}"][et_pb_column type="4_4" _builder_version="4.19.0" _module_preset="default" global_colors_info="{}"][et_pb_wc_title _builder_version="4.19.0" _module_preset="default" header_font="Archivo|700||on|||||" header_text_color="#FFFFFF" header_font_size="3vw" background_enable_color="off" header_font_size_tablet="4vw" header_font_size_phone="6vw" header_font_size_

General Strategy - First put all columns to be replaced in variation markdown and change those for the main and secondary rows. Then for all columns in data row, check if tag exists in divi blurb and change. 
For videos and images, create a special mapping since they will not exist directly in divi blurb

In [59]:
for idx, row in data_df.iterrows():
    jj = generate_sku(row)
    pp = generate_branches(data_df, row[PRODUCT_CODE_COL])
    jc = generate_product_vid_list(row)
    
jj, pp, jc

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  aff_df.fillna("", inplace=True)


('bect-304-utu-bect-304',
 'Electronics & Communication Engineering, Electrical Engineering, Electrical & Electronics Engineering,and  ',
 '')

In [24]:
affiliations = data_df[data_df[PRODUCT_CODE_COL] == 'ES119'][AFFILIATION_BRANCH_NAME].tolist()
    
", ".join(affiliations[:-1]) + " and " + affiliations[-1]

' and First Year Common Course'

In [21]:
get_search_term(description_columns[0])

'{{essential_course_duration}}'

In [24]:
variation_template.columns

Index(['Type', 'SKU', 'Name', 'Published', 'Is featured?',
       'Visibility in catalog', 'Short description', 'Description',
       'Date sale price starts', 'Date sale price ends', 'Tax status',
       'Tax class', 'In stock?', 'Stock', 'Low stock amount',
       'Backorders allowed?', 'Sold individually?', 'Weight (kg)',
       'Length (cm)', 'Width (cm)', 'Height (cm)', 'Allow customer reviews?',
       'Purchase note', 'Sale price', 'Regular price', 'Categories', 'Tags',
       'Shipping class', 'Images', 'Download limit', 'Download expiry days',
       'Parent', 'Grouped products', 'Upsells', 'Cross-sells', 'External URL',
       'Button text', 'Position', 'Attribute 1 name', 'Attribute 1 value(s)',
       'Attribute 1 visible', 'Attribute 1 global',
       'Meta: _et_pb_post_hide_nav', 'Meta: _et_pb_page_layout',
       'Meta: _et_pb_side_nav', 'Meta: _et_pb_use_builder',
       'Meta: _et_pb_first_image', 'Meta: _et_pb_truncate_post',
       'Meta: _et_pb_truncate_post_date', 

In [25]:
variation_template

Unnamed: 0,Type,SKU,Name,Published,Is featured?,Visibility in catalog,Short description,Description,Date sale price starts,Date sale price ends,...,Meta: _et_pb_ab_subjects,Meta: _et_pb_enable_shortcode_tracking,Meta: _et_pb_ab_current_shortcode,Meta: _et_pb_custom_css,Meta: _et_pb_gutter_width,Meta: _global_colors_info,Meta: _et_builder_version,Meta: _et_pb_show_page_creation,Meta: _et_pb_woo_page_content_status,Meta: _et_builder_dynamic_assets_loading_attr_threshold
0,variable,{{sku}},{{title}},1,0,visible,,"[et_pb_section fb_built=""1"" _builder_version=""...",,,...,,,,,3.0,{},VB|Divi|4.19.0,off,modified,
1,"variation, virtual",,{{title}} - Essential,1,0,visible,,,,,...,,,,,,,,,,
2,"variation, virtual",,{[title}} - Advanced,1,0,visible,,,,,...,,,,,,,,,,
3,"variation, virtual",,{{title}} - GATE,1,0,visible,,,,,...,,,,,,,,,,
