In [60]:
import pandas as pd
from datetime import datetime
from slugify import slugify
from currency_converter import CurrencyConverter
import numpy as np

In [61]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

In [62]:
now = datetime.now()
dt_string = now.strftime("%Y-%m-%d %H:%M:%S")

In [63]:
df = pd.read_csv('csv/products.csv', delimiter=';')
images = pd.read_csv('out/image.csv')
price = pd.read_csv('out/price.csv')

In [64]:
df['Price 1'] = df['Price 1'].str.replace(",", ".").astype(float)
df['Price 2'] = df['Price 2'].str.replace(",", ".").astype(float)
df['Price 3'] = df['Price 3'].str.replace(",", ".").astype(float)
df['Price 4'] = df['Price 4'].str.replace(",", ".").astype(float)
df['Price 5'] = df['Price 5'].str.replace(",", ".").astype(float)

In [65]:
df['Rating 1'] = df['Rating 1'].astype(float)
df['Rating 2'] = df['Rating 2'].astype(float)
df['Rating 3'] = df['Rating 3'].astype(float)
df['Rating 4'] = df['Rating 4'].astype(float)
df['Rating 5'] = df['Rating 5'].astype(float)

In [66]:
c = CurrencyConverter()
def convert_currency(x, y):
    try:
        return c.convert(float(x), str(y), 'USD')
    except:
        return np.nan

In [67]:
df['Price 1 USD'] = df.apply(lambda x: convert_currency(x['Price 1'], x['Curr1']), axis=1).astype(float).round(2)
df['Price 2 USD'] = df.apply(lambda x: convert_currency(x['Price 2'], x['Curr2']), axis=1).astype(float).round(2)
df['Price 3 USD'] = df.apply(lambda x: convert_currency(x['Price 3'], x['Curr3']), axis=1).astype(float).round(2)
df['Price 4 USD'] = df.apply(lambda x: convert_currency(x['Price 4'], x['Curr4']), axis=1).astype(float).round(2)
df['Price 5 USD'] = df.apply(lambda x: convert_currency(x['Price 5'], x['Curr5']), axis=1).astype(float).round(2)

In [68]:
df1 = pd.DataFrame()
df2 = pd.DataFrame()
df3 = pd.DataFrame()

In [69]:
df1['minPrice'] = df[['Price 1 USD', 'Price 2 USD', 'Price 3 USD', 'Price 4 USD', 'Price 5 USD']]\
[(df['Price 1 USD'].notnull()) | \
 (df['Price 2 USD'].notnull()) | \
 (df['Price 3 USD'].notnull()) | \
 (df['Price 4 USD'].notnull()) | \
 (df['Price 5 USD'].notnull())
].min(axis=1).astype(float).round(2)
df1['id'] = df['ID'].astype(str) + '_' + df1['minPrice'].astype(str)

In [70]:
df2['maxPrice'] = df[['Price 1 USD', 'Price 2 USD', 'Price 3 USD', 'Price 4 USD', 'Price 5 USD']]\
[(df['Price 1 USD'].notnull()) | \
 (df['Price 2 USD'].notnull()) | \
 (df['Price 3 USD'].notnull()) | \
 (df['Price 4 USD'].notnull()) | \
 (df['Price 5 USD'].notnull())
].max(axis=1).astype(float).round(2)
df2['id'] = df['ID'].astype(str) + '_' + df2['maxPrice'].astype(str)

In [71]:
df3['avgPrice'] = df[['Price 1 USD', 'Price 2 USD', 'Price 3 USD', 'Price 4 USD', 'Price 5 USD']]\
[(df['Price 1 USD'].notnull()) | \
 (df['Price 2 USD'].notnull()) | \
 (df['Price 3 USD'].notnull()) | \
 (df['Price 4 USD'].notnull()) | \
 (df['Price 5 USD'].notnull())
].mean(axis=1).astype(float).round(2)
df3['id'] = df['ID'].astype(str) + '_' + df3['avgPrice'].astype(str)

In [72]:
df1.rename(columns = {'minPrice':'amount'}, inplace = True)
df2.rename(columns = {'maxPrice':'amount'}, inplace = True)
df3.rename(columns = {'avgPrice':'amount'}, inplace = True)

In [73]:
newDf1 = pd.concat([df1, df2, df3])

In [74]:
newDf1['ID'] = newDf1['id']
newDf1['id'] = [i for i in range(price.shape[0]+1, price.shape[0]+1 + newDf1.shape[0])]
newDf1.rename(columns = {'Price':'amount'}, inplace = True)
newDf1['currencyId'] = 'USD'
newDf1['amountMainCurrency'] = newDf1['amount']
newDf1['mainCurrencyId'] = 'USD'

In [75]:
df['avgRating'] = df[['Rating 1', 'Rating 2', 'Rating 3', 'Rating 4', 'Rating 5']]\
[(df['Rating 1'].notnull()) | \
 (df['Rating 2'].notnull()) | \
 (df['Rating 3'].notnull()) | \
 (df['Rating 4'].notnull()) | \
 (df['Rating 5'].notnull())
].mean(axis=1)

In [76]:
df['reviewCount'] = df[['Number Review 1', 'Number Review 2', 'Number Review 3', 'Number Review 4', 'Number Review 5']]\
[(df['Number Review 1'].notnull()) | \
 (df['Number Review 2'].notnull()) | \
 (df['Number Review 3'].notnull()) | \
 (df['Number Review 4'].notnull()) | \
 (df['Number Review 5'].notnull())
].count(axis=1).astype(int)

In [77]:
def get_price_id(value):
    return newDf1[:][newDf1['ID'] == str(value)]['id'].values[0]

In [78]:
newDf = df[['Name', 'E-Tickets', 'Description', 'ID', 'avgRating', 'reviewCount']]

In [79]:
columns = ['createdDate', 'updatedDate', 'internalNote', 'creatorId', 'statusId', \
          'slug', 'shortDescription', 'alternativeName', 'mainImageId']

In [80]:
newDf = pd.concat([newDf, pd.DataFrame(columns = columns)])

In [81]:
newDf.rename(columns = {'Name':'name', 'E-Tickets' : 'onlineTicketUrl', \
                    'Description' : 'description', 'ID' : 'id', 'avgRating' : 'averageRating'}, inplace = True)

In [82]:
import random
import time
 
 
def str_time_prop(start, end, time_format, prop):
    stime = time.mktime(time.strptime(start, time_format))
    etime = time.mktime(time.strptime(end, time_format))
    ptime = stime + prop * (etime - stime)
    return time.strftime(time_format, time.localtime(ptime))
 
 
def random_date(start, end, prop):
    return str_time_prop(start, end, '%Y-%m-%d', prop)
 

In [83]:
newDf['createdDate'] = [random_date("2021-1-1", "2022-4-30", random.random()) for i in range(1, newDf.shape[0] + 1)]
newDf['updatedDate'] = [i for i in newDf['createdDate']]
newDf['internalNote'] = ''
newDf['statusId'] = 4
newDf['creatorId'] = 900
newDf['id'] = newDf.id.astype(int)

In [84]:
def set_slug(value):
    try:
        return slugify(str(value))
    except:
        return ''

newDf['slug'] = newDf['name'].apply(set_slug)

In [85]:
def get_short_description(value):
    return str(value)[:200]

newDf['shortDescription'] = newDf['description'].apply(get_short_description)

In [86]:
def get_alternative_name(value):
    temp = value.split(' ')
    return ', '.join(str(e) for e in temp)

newDf['alternativeName'] = newDf['name'].apply(get_alternative_name)

In [87]:
def get_main_image_id(value):
    try:
        tempName = 'product_' + str(value) + '_1'
        return images[:][images['name'].str.contains(tempName)]['id'].values[0]
    except:
        return ''
    
newDf['mainImageId'] = newDf['id'].apply(get_main_image_id)

In [88]:
copy = newDf1.drop(['ID'], axis=1)

In [89]:
newDf.to_csv('out/products.csv', index=False, encoding='utf-8')
newDf1.to_csv('out/price2.csv', index=False, encoding='utf-8')

In [90]:
newDf.shape

(1943, 15)

In [91]:
copy.to_csv('out/price3.csv', index=False, encoding='utf-8')

In [92]:
copy.shape

(4614, 5)