# Tagret: In recent years, the cosmetics and makeup market in Vietnam has been growing rapidly, with many new products being introduced to the market and there is great competition among brands.
Established in 2016, the Hasaki store system has affirmed its position with a chain of 76 branches spanning 27 provinces and cities of Vietnam. Analyze the website's sales data to know the popular categories and product segments of Vietnamese youth.
link: https://vtv.vn/goc-doanh-nghiep/hasaki-chuoi-he-thong-my-pham-chinh-hang-hon-70-chi-nhanh-khien-gioi-tre-do-du-du-20220930102802555.htm

In [129]:
from bs4 import BeautifulSoup
import requests 
import time
import pandas as pd
import plotly.express as ex
import plotly
import plotly.io as pio
from plotly.offline import init_notebook_mode,iplot
import plotly.graph_objects as go
import matplotlib.pyplot as plt

pio.renderers.default = "browser"
url_0 = 'https://hasaki.vn/danh-muc/trang-diem-c23.html?p={}'
data = []

# load du lieu
for i in range(1,40):  
    url = url_0.format(i)
    page = requests.get(url)
    content = BeautifulSoup(page.content, "html.parser")
    product = content.find_all('div',class_='ProductGridItem__itemOuter')
    for i in product:
        try:
            id = i.find('a', class_='block_info_item_sp')['id']
            name = i.find('a', class_='block_info_item_sp')['data-name']
            price = i.find('a', class_='block_info_item_sp')['data-price']
            variant = i.find('a', class_='block_info_item_sp')['data-variant']
            brand = i.find('a', class_='block_info_item_sp')['data-brand']
            sold = i.find('span', class_='item_count_by').text
            category = i.find('a', class_='block_info_item_sp')['data-category-name']
            data.append([id, name, category, price, variant, brand, sold])

        except:
            pass


# Tao dataFrame
df = pd.DataFrame(data)

# chinh sua du lieu 
df.columns = ['id', 'name', 'category', 'price', 'variant', 'brand', 'sold']
df.price = df.price.astype(int)
df.sold = df.sold.replace('\D','', regex = True).astype(int)

# Ghi lai so luong da ban va dictionary cac brand theo phan loai gia
cheap = {}
q_cheap = 0
nor = {}
q_nor = 0
high_en = {}
q_high_en = 0
# tao dic ghi lai so luong da ban cua tung brand theo tat ca mat hang
brand_solds = {}
# tao dictionary ghi lai so luong da ban ra theo tung mat hang cua tat ca cac brand
category_solds = {}

# Lay du lieu
for item in list(df['brand'].unique()):
    for i in list(df['category'].unique()):
        
#         tinh gia trung binh cua cac nhan hang
        av_price = df[df['brand'] == item]['price'].mean()
    
#         tong san pham da ban ra theo tung brand
        sold = df[df['brand'] == item]['sold'].sum()
    
    
#         tong san pham ban ra theo tung mat hang
        category_sold = df[df["category"] == i]["sold"].sum()
    
#         ghi du lieu vao dic
        brand_solds[item] = sold
        category_solds[i] = category_sold
    
#          phan loai brand theo gia
        if av_price < 200000:
            cheap[item] = int(av_price)
            q_cheap += sold
        
        elif av_price < 500000:
            nor[item] = int(av_price)
            q_nor += sold
        
        else:
            high_en[item] = int(av_price)
            q_high_en += sold
            
# sap xep lai du lieu        
sorted_category_solds = sorted(category_solds.items(), key=lambda x: x[1])
sorted_brand_solds = sorted(brand_solds.items(), key=lambda x: x[1])
sorted_cheap = sorted(cheap.items(), key=lambda x: x[1])
sorted_nor = sorted(nor.items(), key=lambda x: x[1])
sorted_high_en = sorted(high_en.items(), key=lambda x: x[1])

values_category_solds = list([x[1] for x in sorted_category_solds])
values_brand_solds = list([x[1] for x in sorted_brand_solds])
values_cheap = list([x[1] for x in sorted_cheap])
values_nor = list([x[1] for x in sorted_nor])
values_high_en = list([x[1] for x in sorted_high_en])

label_category_solds = list([x[0] for x in sorted_category_solds])
label_brand_solds = list([x[0] for x in sorted_brand_solds])
label_cheap = list([x[0] for x in sorted_cheap])
lable_nor = list([x[0] for x in sorted_nor])
label_high_en = list([x[0] for x in sorted_high_en])

# brand ban chay nhat
best_seller=label_brand_solds[-1]

# so luong ban ra cua cac brand doi voi 5 mat hang pho bien nhat
for i in range(1,6):
    brand = []
    solds = []
    for item in list(df['brand'].unique()):
        
        c=df[(df['brand']==item)&(df['category']==sorted_values[-i][0])]["sold"].sum()
        
        brand.append(item)
        solds.append(c)
        
        globals()[f"trace{i}"] = go.Scatter(x=brand,  y=solds,  name='{}'.format(sorted_values[-i][0]))

# so luong ban ra cac mat hang cua brand ban chay nhat
category_of_bs=[]
sold_category_of_bs=[]
price_category_of_bs=[]

for item in list(df['category'].unique()):
    
        sold_category = df[(df['brand'] == best_seller) & (df["category"] == item)]['sold'].sum()
        price_category = df[(df['brand'] == best_seller) & (df["category"] == item)]['price'].mean()

        
#         loai bo cac san pham chua co luot mua
        if sold_category > 0:
            category_of_bs.append(item)
            sold_category_of_bs.append(sold_category)
            price_category_of_bs.append(int(price_category))

        
# ve cac bieu do
# bieu do 1: so luong san pham da ban ra cua cac brand
fig1 = go.Figure(data = go.Scatter(x = label_brand_solds, y = values_brand_solds))
fig1.update_layout(title = 'Number of products sold by brands',xaxis_title = 'Brands',yaxis_title = 'Number of products sold',barmode='stack')
fig1.show()

# bieu do 2: phan loai cac mat hang theo gia trung binh
fig2 = go.Figure()
fig2.add_trace(go.Bar(x = label_cheap, y = values_cheap, name = 'Drugstore'))
fig2.add_trace(go.Bar(x = lable_nor, y = values_nor, name = 'Mid-range'))
fig2.add_trace(go.Bar(x = label_high_en, y = values_high_en, name = 'High-end'))
fig2.update_layout(title = 'Sort brands by price segment',xaxis_title = 'Brands', yaxis_title = 'Average Price', barmode = 'stack')
fig2.show()

# bieu do 3: phan khuc san pham pho bien
fig3 = go.Figure(data = [go.Pie(labels = ["Drugstore", "Mid-range", "High-end"], values = [q_cheap, q_nor, q_high_en])])
fig3.update_layout(title = 'Best-selling product segments')
fig3.show()

# bieu do 4: so luong ban ra cua 5 mat hang pho bien nhat cua cac brand
data = [globals()[f"trace{i}"] for i in range(1,6)]
layout4 = go.Layout(title = 'Best-selling product categories', xaxis = dict(title = 'Categories'), yaxis = dict(title = 'Number of products sold'))
fig4 = go.Figure(data = data, layout = layout4)
fig4.show()

# bieu do 5: ti le cac mat hang pho bien cua brand ban chay nhat
data = go.Bar(x = category_of_bs, y = sold_category_of_bs, marker = dict(color = price_category_of_bs, colorscale='Viridis', showscale=True), text = price_category_of_bs)
layout5 = go.Layout(title = 'Categories of best seller brand', xaxis = dict(title = 'category'), yaxis = dict(title='Number of products sold'), hovermode='closest')
figure5 = go.Figure(data = data, layout = layout5)
iplot(figure5)






# Conclusion: 
updated: 4/6/2023
For young people, they often look for makeup products that are convenient, versatile and used every day such as lip balm, eyeliner, mascara... Young people are also often interested in the price of products, they are often looking for high-quality makeup products at affordable prices.

The famous and popular makeup brands among young people in Vietnam today can be mentioned as: Maybelline, Vasseline, Mediheal... These brands are classified as drugstore cosmetics because of their price.
Vietnamese youth may prefer drugstore cosmetics because they offer affordable prices and a wide range of products. , drugstores cosmetic in Vietnam are often conveniently located in urban areas, making them accessible to youth who may not have access to higher-end cosmetic. The availability and affordability of drugstore cosmetics may be why they are a popular choice among Vietnamese youth.

The benefits of inquire the purchasing behavior of Vietnamese youth regarding cosmetics can be numerous. Understanding this behavior can provide insights into the preferences and habits of a large consumer group in Vietnam, which can be useful for cosmetic companies looking to better target their products and marketing efforts. By gaining a better understanding of the factors that influence the purchasing decisions of Vietnamese youth, companies can better tailor their products and marketing messaging to meet the needs and wants of this demographic. This can lead to increased sales and brand loyalty. Also, inquire into the purchasing behavior of Vietnamese youth can provide insights into overall cultural and societal trends and dynamics in Vietnam.