In [None]:
import pandas as pd
import numpy as np
import re
from collections import defaultdict
import os
import glob
import json
import ast
from datetime import datetime, timedelta
import random

# Custom Function

In [None]:
def get_type(product_category):
    if pd.isna(product_category):
        return 'Misc'
    else:
        return product_category.split('>')[-1]

In [None]:
def convert_weight(weight):
    if pd.isna(weight):
        return ''
    else:
        grams = weight/2205
        return float("{:.2f}".format(grams))

In [None]:
def get_option_name(input_string):
    if pd.isna(input_string):
        return input_string
    else:
        input_string = input_string.replace('"','')
        sections = input_string.strip().split('\n\n')
        option = sections[0].strip().split('\n')
        option_name = option[0]
        return option_name

In [None]:
def get_option_value(input_string):
    if pd.isna(input_string):
        return input_string
    else:
        input_string = input_string.replace('"','')
        sections = input_string.strip().split('\n\n')
        option = sections[0].strip().split('\n')
        option_values = option[1:-1]
        return option_values

In [None]:
def get_variant_sku(variant_sku, opt_value):
    if (pd.isna(variant_sku)) | (pd.isna(opt_value)):
        return variant_sku
    else:
        return variant_sku + '-' + opt_value

In [None]:
def get_image_alt_text(image_src):
    if pd.isna(image_src):
        return image_src
    else:
        return image_src.split('/')[-1]

In [None]:
def convert_upc(barcode):
    if (pd.isna(barcode)) | (barcode == 'DOES NOT APPLY'):
        return ''
    else:
        return str(int(barcode))

In [None]:
def read_all(folder_path, encoding='utf-8'):

    path = os.path.join(os.getcwd(), folder_path)
    all_files = glob.glob(os.path.join(path , "*.csv"))
    li = []
    for filename in all_files:
        df = pd.read_csv(filename, index_col=None, header=0, encoding=encoding)
        li.append(df)
    
    frame = pd.concat(li, axis=0, ignore_index=True)

    return frame

In [None]:
def get_tag(breadcrumb):
    if pd.isna(breadcrumb):
        return breadcrumb
    else:
        tags = breadcrumb.replace(',', ';').split(':')
        result = ','.join(tags)
        return result

In [None]:
def get_collection(tags):
    if pd.isna(tags):
        return tags
    else:
        collection = tags.split(',')
        return collection[-1]

In [None]:
def gen_datetime(x, min_year=2020, max_year=datetime.now().year):
    # generate a datetime in format yyyy-mm-dd hh:mm:ss.000000
    start = datetime(min_year, 1, 1, 00, 00, 00)
    years = max_year - min_year + 1
    end = start + timedelta(days=((365 * years) - 166)) 
    result_datetime = start + (end - start) * random.random()
    return result_datetime.strftime('%Y-%m-%d %H:%M:%S')

In [None]:
def extract_data(dict_string, field):
    dict_data = ast.literal_eval(dict_string)
    results = list()
    for data in dict_data['data']:
        results.append(data[field])
    return results

In [None]:
def generate_tt_product_url(handle):
    if pd.isna(handle):
        return None
    return f'https://www.trendtimes.com/{handle}.html'

In [None]:
def generate_sf_product_url(handle):
    if pd.isna(handle):
        return None
    return f'https://www.trendtimes.com/products/{handle}'

In [None]:
def generate_sf_collection_url(handle):
    if pd.isna(handle):
        return None
    return f'https://www.trendtimes.com/collections/{handle}'

In [None]:
def generate_key(product_id):
    if pd.isna(product_id):
        return None
    product_id = product_id.strip('-')
    product_id = re.sub(r'-+', '-', product_id)
    if product_id.startswith('-'):
        product_id = product_id[1:]
    
    return product_id

In [None]:
def format_price(price):
    if pd.isna(price):
        return None
    if type(price) == float:
        return price
    price = price.split(' ')[0]
    
    return price

In [None]:
def format_weight(weight):
    if pd.isna(price):
        return None
    if (type(price) == float) & (type(price) == int):
        return price
    price = float(price.replace(' ', '.'))
    
    return price

In [None]:
def generate_collection_url(handle):
    if pd.isna(handle):
        return None
    return f'https://www.trendtimes.com/{handle}.html'    

In [None]:
def extract_alphanumeric(text):
    pattern = re.compile(r"\b[a-zA-Z0-9]+\b|\'")
    alphanumeric_matches = pattern.findall(text)
    return alphanumeric_matches

In [None]:
def title_to_id(title):
    if pd.isna(title):
        return None
    result = extract_alphanumeric(title.lower())
    result = '-'.join(result)
    result = re.sub(r'-+', '-', result)
    result = result.replace("-'-", "")
    return result

parsed_dict = parse_string_to_dict(input_string)
print(parsed_dict)

# Create shopify template

In [None]:
template = pd.read_csv('products_export.csv')

In [None]:
template.columns

# Transform source product datas into shopify template

In [None]:
source = read_all('source', encoding='latin-1')

In [None]:
trendtime_col = source[source['supplier'] == 'Sections']

In [None]:
trendtime_prod = source[source['supplier'] != 'Sections']

# unimported_items

In [None]:
ui_df = pd.read_csv('unimported_items.csv')

In [None]:
ui_df['id'] = ui_df['product_urls'].apply(lambda x: x.split('/')[-1].split('.')[0])

In [None]:
ui_id_list = ui_df['id'].to_list() 

In [None]:
source = source[source['id'].isin(ui_id_list)].reset_index(drop=True)

In [None]:
result = pd.DataFrame(columns=template.columns)

In [None]:
result['Handle'] = source.loc[:,'id']

In [None]:
result.loc[:, 'Title'] = source['name']
result.loc[:, 'Title'] = result.loc[:, 'Title'].fillna('No Title')

In [None]:
result.loc[:, 'Body (HTML)'] = source['caption']

In [None]:
result.loc[:, 'Vendor'] = source['brand']

In [None]:
result.loc[:, 'Product Category'] = source['google-base-product-type']

In [None]:
result.loc[:, 'Type'] = source['google-base-product-type'].apply(get_type)

In [None]:
# bc_df = read_all('breadcrumbs')
bc_df = pd.read_csv('breadcrumbs/breadcrumbds_data_unimported.csv')

In [None]:
bc_df['id'] = bc_df['id'].apply(lambda x: x.split('/')[-1].split('.')[0])

In [None]:
bc_df.drop_duplicates(inplace=True, ignore_index=True)

In [None]:
result.loc[:, 'Tags'] = pd.merge(result.loc[:, 'Handle'], bc_df, how='left', left_on='Handle', right_on='id')['breadcrumbs']
result.loc[:, 'Tags'] = result.loc[:, 'Tags'].apply(get_tag)

In [None]:
result.loc[:, 'Published'] = True

In [None]:
result.loc[:, 'Option1 Name'] = source['options'].apply(get_option_name)

In [None]:
result.loc[:, 'Option1 Value'] = source['options'].apply(get_option_value)

In [None]:
result.loc[:, 'Variant SKU'] = source['code']

In [None]:
result.loc[:, 'Variant Grams'] = source['ship-weight']

In [None]:
result.loc[:, 'Variant Inventory Tracker'] = 'shopify'

In [None]:
result.loc[:, 'Variant Inventory Qty'] = source['orderable'].replace({'Yes': 10, 'No': 0})

In [None]:
result.loc[:, 'Variant Inventory Policy'] = 'deny'

In [None]:
result.loc[:, 'Variant Fulfillment Service'] = 'manual'

In [None]:
result.loc[:, 'Variant Price'] = source['sale-price'].apply(format_price)

In [None]:
result.loc[:, 'Variant Compare At Price'] = source['price'].apply(format_price)

In [None]:
result.loc[:, 'Variant Requires Shipping'] = source['need-ship'].replace({'Yes': True, 'No': False})

In [None]:
result.loc[:, 'Variant Taxable'] = True

In [None]:
result.loc[:, 'Variant Barcode'] = source['upc']

In [None]:
result.loc[:, 'Variant Barcode'] = result.loc[:, 'Variant Barcode'].apply(convert_upc)

In [None]:
image_df = pd.read_csv('images_src_rev2/images_data_unimported.csv')

In [None]:
image_df['id'] = image_df['id'].apply(lambda x: x.split('/')[-1].split('.')[0])

In [None]:
image_df[image_df.duplicated()]

In [None]:
image_df.drop_duplicates(inplace=True)

In [None]:
result.loc[:, 'Image Src'] = pd.merge(result.loc[:, 'Handle'], image_df, how='left', left_on='Handle', right_on='id')['images']

In [None]:
result.loc[:, 'Gift Card'] = source['gift-certificate'].replace({'Yes': True, 'No': False})

In [None]:
result.loc[:, 'SEO Title'] = source['name']

In [None]:
result.loc[:, 'SEO Description'] = source['abstract']

In [None]:
result.loc[:, 'Google Shopping / Google Product Category'] = source['google-base-product-type']

In [None]:
result.loc[:, 'Google Shopping / MPN'] = source['upc']

In [None]:
result.loc[:,'Google Shopping / MPN'] = result.loc[:,'Google Shopping / MPN'].apply(convert_upc)

In [None]:
result.loc[:, 'Google Shopping / Condition'] = 'New'

In [None]:
result.loc[:, 'Variant Weight Unit'] = 'lb'

In [None]:
result.loc[:, 'Included / United States'] = True

In [None]:
result.loc[:, 'Included / Canada'] = True

In [None]:
result.loc[:, 'Status'] = 'active'

In [None]:
# result.loc[:,'Collection'] = result.loc[:,'Tags'].apply(get_collection)

# Input After Explode Option

In [None]:
exploded_opt_result = result.assign(Option1Value=result['Option1 Value'].str.split(";")).explode("Option1 Value")

In [None]:
exploded_opt_result[~pd.isna(exploded_opt_result['Option1 Name'])][['Handle', 'Option1 Name', 'Option1 Value', 'Variant SKU', 'Image Src']]

In [None]:
exploded_opt_result.loc[(~pd.isna(exploded_opt_result['Option1 Name'])) & (pd.isna(exploded_opt_result['Option1 Value'])), 'Option1 Value'] = 'Standard'

In [None]:
exploded_opt_result.loc[:, 'Variant SKU'] = exploded_opt_result.apply(lambda x: get_variant_sku(x['Variant SKU'], x['Option1 Value']), axis=1)

In [None]:
uneeded_opt_column = [
    'Title', 'Body (HTML)', 'Vendor', 'Product Category', 'Type', 'Tags',
    'Published', 'Option1 Name', 'Option2 Name',
    'Option2 Value', 'Option3 Name', 'Option3 Value', 'SEO Title','SEO Description', 'Image Src',
    'Google Shopping / Google Product Category', 'Google Shopping / Gender',
    'Google Shopping / Age Group', 'Google Shopping / MPN',
    'Google Shopping / Condition', 'Google Shopping / Custom Product',
    'Google Shopping / Custom Label 0', 'Google Shopping / Custom Label 1',
    'Google Shopping / Custom Label 2', 'Google Shopping / Custom Label 3',
    'Google Shopping / Custom Label 4', 'Cost per item', 'Included / United States', 'Price / United States', 
    'Compare At Price / United States', 'Included / Canada', 'Price / Canada',
    'Compare At Price / Canada', 'Status'
]

In [None]:
exploded_opt_result['Flag'] = exploded_opt_result['Handle'].eq(exploded_opt_result['Handle'].shift(1))

In [None]:
exploded_opt_result.loc[exploded_opt_result['Flag'], uneeded_opt_column] = ''

In [None]:
exploded_opt_result.drop('Flag', axis=1, inplace=True)

# Input After Explode Images Src

In [None]:
final_df = exploded_opt_result.copy()

In [None]:
final_df['Image Src'] = final_df['Image Src'].str.split(';')

In [None]:
final_df = final_df.assign(ImageSrc=final_df['Image Src']).explode("Image Src")

In [None]:
final_df.drop(['ImageSrc', 'Option1Value'], inplace=True, axis=1)

In [None]:
final_df['Image Alt Text'] = final_df['Image Src'].apply(get_image_alt_text)

In [None]:
final_df.fillna('', inplace=True)

In [None]:
uneeded_image_column = [
    'Title', 'Body (HTML)', 'Vendor', 'Product Category', 'Type',
    'Tags', 'Published', 'Option1 Name', 'Option1 Value', 'Option2 Name',
    'Option2 Value', 'Option3 Name', 'Option3 Value', 'Variant SKU',
    'Variant Grams', 'Variant Inventory Tracker', 'Variant Inventory Qty',
    'Variant Inventory Policy', 'Variant Fulfillment Service',
    'Variant Price', 'Variant Compare At Price',
    'Variant Requires Shipping', 'Variant Taxable', 'Variant Barcode',
    'Image Position', 'Gift Card',
    'SEO Title', 'SEO Description',
    'Google Shopping / Google Product Category', 'Google Shopping / Gender',
    'Google Shopping / Age Group', 'Google Shopping / MPN',
    'Google Shopping / Condition', 'Google Shopping / Custom Product',
    'Google Shopping / Custom Label 0', 'Google Shopping / Custom Label 1',
    'Google Shopping / Custom Label 2', 'Google Shopping / Custom Label 3',
    'Google Shopping / Custom Label 4', 'Variant Image',
    'Variant Weight Unit', 'Variant Tax Code', 'Cost per item',
    'Included / United States', 'Price / United States',
    'Compare At Price / United States', 'Included / Canada',
    'Price / Canada', 'Compare At Price / Canada', 'Status'
]

In [None]:
final_df['Flag'] = (final_df['Handle'].eq(final_df['Handle'].shift(1))) & (final_df['Option1 Value'].eq(final_df['Option1 Value'].shift(1)))

In [None]:
final_df.loc[final_df['Flag'], uneeded_image_column] = ''

In [None]:
final_df.drop('Flag', axis=1, inplace=True)

In [None]:
final_df

# Explode Collection

In [None]:
very_final_df = final_df.copy()

In [None]:
very_final_df['Tags_2'] = very_final_df['Tags'].str.split(',')

In [None]:
very_final_df = very_final_df.assign(Collection=very_final_df['Tags_2']).explode("Collection")

In [None]:
uneeded_image_column = [
    'Title', 'Body (HTML)', 'Vendor', 'Product Category', 'Type',
    'Tags', 'Published', 'Option1 Name', 'Option1 Value', 'Option2 Name',
    'Option2 Value', 'Option3 Name', 'Option3 Value', 'Variant SKU',
    'Variant Grams', 'Variant Inventory Tracker', 'Variant Inventory Qty',
    'Variant Inventory Policy', 'Variant Fulfillment Service',
    'Variant Price', 'Variant Compare At Price', 'Image Src',
    'Variant Requires Shipping', 'Variant Taxable', 'Variant Barcode',
    'Image Position', 'Gift Card', 'Image Alt Text',
    'SEO Title', 'SEO Description',
    'Google Shopping / Google Product Category', 'Google Shopping / Gender',
    'Google Shopping / Age Group', 'Google Shopping / MPN',
    'Google Shopping / Condition', 'Google Shopping / Custom Product',
    'Google Shopping / Custom Label 0', 'Google Shopping / Custom Label 1',
    'Google Shopping / Custom Label 2', 'Google Shopping / Custom Label 3',
    'Google Shopping / Custom Label 4', 'Variant Image',
    'Variant Weight Unit', 'Variant Tax Code', 'Cost per item',
    'Included / United States', 'Price / United States',
    'Compare At Price / United States', 'Included / Canada',
    'Price / Canada', 'Compare At Price / Canada', 'Status'
]

In [None]:
very_final_df.drop('Tags_2', axis=1, inplace=True)

In [None]:
very_final_df.fillna('', inplace=True)

In [None]:
very_final_df['Flag'] = (very_final_df['Handle'].eq(very_final_df['Handle'].shift(1))) & (very_final_df['Collection'] != '')

In [None]:
very_final_df.loc[very_final_df['Flag'], uneeded_image_column] = ''

In [None]:
very_final_df.drop('Flag', axis=1, inplace=True)

In [None]:
very_final_df.to_csv('result/trendtime_shopify_products_unimported.csv', index=False)

# Data to CSV

In [None]:
collection_df.to_csv('Collection_list_unimported.csv')

In [None]:
# final_df.iloc[10002:15001].to_csv('result/trendtime_shopify_products_03.csv', index=False)
final_df.to_csv('result/trendtime_shopify_products_unimported.csv', index=False)

In [None]:
final_df[final_df['Handle'] == 'aa-batteries-4-pack']

In [None]:
default_df = pd.read_csv('default-table (copy 1).csv', encoding='latin-1')

In [None]:
default_df.loc[default_df['id'].str.contains("huge-apache-rc-helicopter")]

In [None]:
item_2

# Review converter

In [None]:
review_df = read_all('reviews')

In [None]:
review_df = review_df[~pd.isna(review_df['reviews'])]

In [None]:
review_df.reset_index(inplace=True, drop=True)

In [None]:
review_df

In [None]:
review_template = pd.read_csv('Okendo_Import_Template_v1(1).csv')

In [None]:
review_template

In [None]:
review_result = review_template[0:0]

In [None]:
review_result['handle'] = review_df['id']

In [None]:
review_result

In [None]:
review_result.loc[:,'reviews'] = review_df['reviews']

In [None]:
review_result

In [None]:
review_result['name'] = review_result.apply(lambda x: extract_data(x['reviews'],field='posted_by'), axis=1)

In [None]:
review_result['body'] = review_result.apply(lambda x: extract_data(x['reviews'],field='review_text'), axis=1)

In [None]:
review_result['rating'] = review_result.apply(lambda x: extract_data(x['reviews'],field='review_rating'), axis=1)

In [None]:
review_result['title'] = review_result.apply(lambda x: extract_data(x['reviews'],field='review_title'), axis=1)

In [None]:
review_result.loc[0,'reviews']

In [None]:
review_result

In [None]:
review_result.drop(columns='reviews', inplace=True)

In [None]:
review_result = review_result.explode(['name', 'body', 'rating', 'title'], ignore_index=True)

In [None]:
review_result['dateCreated'] = review_result.apply(lambda x: gen_datetime(x['handle']), axis=1)

In [None]:
review_result['isApproved'] = True

In [None]:
review_result

In [None]:
review_result.to_csv('reviews.csv', index=False)

# Redirct Url

## Product

In [None]:
trendtime_prod[pd.isna(trendtime_prod['code'])]

In [None]:
# shopify_redirect_df = pd.read_csv('product_urls_rev1.csv')

In [None]:
# shopify_redirect_df[shopify_redirect_df['handle'] == 'canadair-rc-plane']

In [None]:
trendtime_prod['Old Page Url'] = trendtime_prod['id'].apply(generate_tt_product_url)

In [None]:
trendtime_prod = trendtime_prod[['id', 'Old Page Url']]

In [None]:
trendtime_prod['key'] = trendtime_prod['id'].apply(generate_key)

In [None]:
trendtime_prod['New Page Url'] = trendtime_prod['key'].apply(generate_sf_product_url)

In [None]:
trendtime_prod[['Old Page Url', 'New Page Url']].to_csv('trendtimes-redirectrules-prod.csv', index=False)

In [None]:
# redirect_urls = source.merge(shopify_redirect_df, how='left', left_on='key', right_on='handle')[['product_urls', 'onlineStorePreviewUrl']]

In [None]:
# redirect_urls.rename({'product_urls': 'Old Page Url', 'onlineStorePreviewUrl': 'New Page Url'}, inplace=True)

In [None]:
# redirect_urls.dropna(inplace=True)

In [None]:
# redirect_urls.to_csv('trendtimes-redirectrules-with-na.csv', index=False)

## Collections

In [None]:
trendtime_col

In [None]:
trendtime_col['Old Page Url'] = trendtime_col['id'].apply(generate_tt_product_url)

In [None]:
trendtime_col = trendtime_col[['id', 'Old Page Url']]

In [None]:
trendtime_col

In [None]:
trendtime_col['key'] = trendtime_col['id'].apply(generate_key)

In [None]:
trendtime_col

In [None]:
trendtime_col['New Page Url'] = trendtime_col['key'].apply(generate_sf_collection_url)

In [None]:
sf_collections = pd.read_csv('collection_list.csv') 

In [None]:
col_checker = trendtime_col.merge(sf_collections, how='left', left_on='key', right_on='handle')

In [None]:
col_checker['is_available'] = col_checker.apply(lambda x: 'No' if pd.isna(x['handle']) else 'Yes', axis=1)

In [None]:
trendtime_col

In [None]:
col_checker.to_csv('trendtimes-redirectrules-col-checker.csv', index=False)

In [None]:
trendtime_col[['Old Page Url', 'New Page Url']].to_csv('trendtimes-redirectrules-col.csv', index=False)

## Collection Rev1

In [None]:
tt_col = read_all('sections')

In [None]:
tt_col

In [None]:
tt_col = tt_col.drop_duplicates(ignore_index=True)

In [None]:
tt_col

In [None]:
tt_col.rename({'url': 'Old Page Url'}, axis=1, inplace=True)

In [None]:
tt_col['handle'] = tt_col['tag'].apply(title_to_id)

In [None]:
tt_col['id'] = tt_col['Old Page Url'].apply(lambda x: x.split('/')[-1].replace('.html', ''))

In [None]:
tt_col

In [None]:
sf_col = pd.read_csv('sf_collection_list.csv')

In [None]:
sf_col

### Find Section not appear on breadcrumbs

In [None]:
section_trendtime_tt = pd.merge(trendtime_col, tt_col, how='left', on='id')

In [None]:
section_trendtime_tt

In [None]:
section_trendtime_tt[pd.isna(section_trendtime_tt['handle'])]

In [None]:
section_tt_trendtime = pd.merge(tt_col, trendtime_col, how='left', on='id')

In [None]:
section_tt_trendtime

In [None]:
section_tt_trendtime[pd.isna(section_tt_trendtime['Old Page Url_y'])].to_csv('cek_col_tt_trendtime.csv')

In [None]:
trendtime_col[trendtime_col['id'].str.contains('halloween-costumes---movie-theater-prop-toys-for-children-to-adults-accessories---makeup-makeup-airb')]

In [None]:
source[source['id'] == 'halloween-costumes---movie-theater-prop-toys-for-children-to-adults-accessories---makeup-makeup-airb'].to_csv('hallowen.csv', index=False)

In [None]:
sf_col['New Page Url'] = sf_col['handle'].apply(generate_sf_collection_url)

In [None]:
sf_col[sf_col['handle'].str.contains('jags')]

In [None]:
sf_col[sf_col['handle'].str.contains('redcat-volcano-epx-remote-control-truck-parts-epx-pro')]

In [None]:
tt_col[tt_col['handle'].str.contains('2-channel')]

In [None]:
redirectrules = pd.merge(tt_col, sf_col, how='left', on='handle')

In [None]:
redirectrules[pd.isna(redirectrules['New Page Url'])]

In [None]:
redirectrules = redirectrules[['Old Page Url', 'New Page Url']]

In [None]:
redirectrules.to_csv('trendtimes-redirectrules-col-rev1.csv', index=False)

## Col Checking

In [None]:
trendtime_col

In [None]:
cek_df = pd.merge(trendtime_col, tt_col, how='left', left_on='key', right_on='id')

In [None]:
cek_df

In [None]:
cek_df[~pd.isna(cek_df['Old Page Url'])]

In [None]:
cek_df[cek_df['id_x'].str.contains('2-channel-rc')]

## Col Checking sf

In [None]:
cek_df1 = pd.merge(tt_col, trendtime_col , how='left', left_on='id', right_on='id')

In [None]:
cek_df1[pd.isna(cek_df1['key'])]

In [None]:
trendtime_col[trendtime_col['id'].str.contains('jags')]

In [None]:
cek_df1[pd.isna(cek_df1['key'])]

# Trial

In [None]:
source = read_all('source', encoding='latin-1')

In [None]:
source.shape

In [None]:
source[source['id'].str.contains('airsoft')].shape

## Collection Trial

In [None]:
trendtime_col = source[((pd.isna(source['code'])) & (source['supplier'] == 'Sections'))]

In [None]:
trendtime_col.shape

In [None]:
trendtime_col['Old Page Url'] = trendtime_col['id'].apply(generate_tt_product_url)

In [None]:
trendtime_col = trendtime_col[['id', 'Old Page Url']]

In [None]:
trendtime_col

In [None]:
scraped_url = read_all('sections')

In [None]:
scraped_url = scraped_url.drop_duplicates(ignore_index=True)

In [None]:
scraped_url['id'] = scraped_url['url'].apply(lambda x: x.split('/')[-1].replace('.html', ''))

In [None]:
scraped_url['handle'] = scraped_url['tag'].apply(title_to_id)

In [None]:
scraped_url

In [None]:
trendtime_col['New Page Url'] = trendtime_col['key'].apply(generate_sf_collection_url)

In [None]:
tt_x_scraped = pd.merge(trendtime_col, scraped_url, how='left', on='id')

In [None]:
col_checker = trendtime_col.merge(sf_collections, how='left', left_on='key', right_on='handle')

In [None]:
col_checker['is_available'] = col_checker.apply(lambda x: 'No' if pd.isna(x['handle']) else 'Yes', axis=1)

In [None]:
tt_x_scraped[~pd.isna(tt_x_scraped['handle'])]

In [None]:
sf_col = pd.read_csv('sf_collection_list.csv')

In [None]:
sf_col['New Page Url'] = sf_col['handle'].apply(generate_sf_collection_url)

In [None]:
sf_col

In [None]:
result_col = pd.merge(tt_x_scraped, sf_col, how='left', on='handle').reset_index(drop=True)

In [None]:
result_col

In [None]:
col_matched = result_col[~pd.isna(result_col['New Page Url'])]

In [None]:
col_matched[['Old Page Url', 'New Page Url']].to_csv('trendtimes-redirectrules-col-rev2(matched).csv', index=False)

In [None]:
col_unmatched = result_col[pd.isna(result_col['New Page Url'])]

In [None]:
col_unmatched[['Old Page Url', 'New Page Url']].to_csv('trendtimes-redirectrules-col-rev2(unmatched).csv', index=False)

## Product Trial

In [None]:
trendtime_prod = source[~((pd.isna(source['code'])) & (source['supplier'] == 'Sections'))]

In [None]:
trendtime_prod

In [None]:
trendtime_prod['Old Page Url'] = trendtime_prod['id'].apply(generate_tt_product_url)

In [None]:
trendtime_prod['handle'] = trendtime_prod['id'].apply(generate_key)

In [None]:
trendtime_prod

In [None]:
trendtime_prod = trendtime_prod[['id', 'handle', 'Old Page Url']]

In [None]:
trendtime_prod

In [None]:
sf_prod = pd.read_csv('product_urls_rev2.csv')

In [None]:
sf_prod[sf_prod['handle'].str.contains('monster-truck-m')]

In [None]:
sf_prod['New Page Url'] = sf_prod['handle'].apply(generate_sf_product_url)

In [None]:
sf_prod[['handle','New Page Url']]

In [None]:
result_prod = pd.merge(trendtime_prod, sf_prod, how='left', on='handle')

In [None]:
result_prod = result_prod[['Old Page Url', 'New Page Url']]

In [None]:
prod_matched = result_prod[~pd.isna(result_prod['New Page Url'])]

In [None]:
prod_matched[['Old Page Url', 'New Page Url']].to_csv('trendtimes-redirectrules-prod-rev2(matched).csv', index=False)

In [None]:
prod_unmatched = result_prod[pd.isna(result_prod['New Page Url'])]

In [None]:
prod_unmatched[['Old Page Url', 'New Page Url']].to_csv('trendtimes-redirectrules-prod-rev2(unmatched).csv', index=False)

# Clean Shopify Product

In [None]:
df = read_all('product_export')

In [None]:
df

In [None]:
df_price_0 = df.loc[(pd.isna(df['Image Src'])) & (df['Variant Price'] == 0.0) & (df['Status'] == 'active')]

In [None]:
df_price_0[df['Handle'] == ]

# Analytics

In [None]:
df = pd.read_csv('source/item-2.csv', encoding='latin-1')

In [None]:
df.fillna('', inplace=True)

In [None]:
df[df['code'] == 'DG88672M'].to_csv('hello_kitty_witch.csv', index=False)

# Cek breadcrumbs order

In [None]:
breadcrumbs = pd.read_csv('breadcrumbs/breadcrumbs_data.csv')

In [None]:
breadcrumbs

In [None]:
breadcrumbs[breadcrumbs['id'] == 'disney-frozen-anna-coronation-4-6'].iloc[0,1]

# Cek Product Dummy without redirect url

In [None]:
df = read_all('data/product_data')

In [None]:
df.info()

In [None]:
df_dummy_product = df.loc[(pd.notna(df['Title'])) & (df['Variant Price'] == 0) & (pd.isna(df['Variant Barcode'])) & (df['Status'] == 'active') & (pd.isna(df['Image Src'])) & (df['Variant Grams'] == 0)]

In [None]:
df_dummy_product

In [None]:
df_dummy_product.to_csv('data/dummy_product.csv', index=False)

In [None]:
collection_df = pd.read_csv('data/collections.csv')

In [None]:
collection_df

In [None]:
check_df = df_dummy_product.merge(collection_df, how='left', left_on='Handle', right_on='handle')

In [None]:
check_df

In [None]:
check_df.to_csv('data/check.csv', index=False)