# **Necessary Imports**

In [1]:
import os
import math
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

# **Mounting Drive**

In [2]:
from google.colab import drive
drive.mount('/gdrive')

Mounted at /gdrive


# **Fetching CSV Files**

In [3]:
base_path = "/gdrive/MyDrive/Daraz 11.11 Top Sale Data Analysis/"

In [4]:
csv_files = os.listdir(base_path+'Product Data')
print(f"Aavailable Files: {csv_files}")

Aavailable Files: ['watches_bags_jewellery.csv', 'automotive_&_motorbike.csv', 'electronic_accessories.csv', 'electronic_devices.csv', 'health_&_beauty.csv', 'tv_&_home_appliances.csv', 'men_&_boys_fashion.csv', 'groceries.csv', 'sports_&_fitness.csv', 'home_&_lifestyle.csv', 'mother_&_baby.csv']


# **Preparing the Product Data Frame**

In [5]:
data_frame = pd.DataFrame()

In [6]:
for file in csv_files:
  file_path = f"{base_path}Product Data/{file}"
  df = pd.read_csv(f"{file_path}")
  data_frame = pd.concat([data_frame, df], ignore_index=True)

# **Viewing the DataFrame**

In [7]:
data_frame.head()

Unnamed: 0,Category,SubCategory,Title,Original Price,Discount Price,Discount,Seller Name,Number of Ratings,Positive Seller Ratings,Ship On Time,Chat Response Rate,Delivery Type,Cash On Delivery,Flagship Store
0,"Watches, Bags, Jewellery",https://www.daraz.com.bd/school-bags/,Animal Prints Cotton Canvas Children Bags Back...,550,255,54.0,Twenty Twenty,342,85,0,98,Standard Delivery,Cash on Delivery Available,No
1,"Watches, Bags, Jewellery",https://www.daraz.com.bd/school-bags/,School Bags For Girls Big Capacity Backpack Sh...,1650,725,56.0,AYSHA TRADERS,31,86,0,93,Free Delivery,Cash on Delivery Available,No
2,"Watches, Bags, Jewellery",https://www.daraz.com.bd/school-bags/,Nylon Animal Children Backpacks Kids Preschool...,350,195,44.0,Shah Wholesale Fashion,13,77,0,57,Free Delivery,Cash on Delivery Available,No
3,"Watches, Bags, Jewellery",https://www.daraz.com.bd/school-bags/,Backpacks for kids in kindergarten Cute baby b...,1200,749,38.0,Ghoredokanbd,23,93,0,100,Standard Delivery,Cash on Delivery Available,No
4,"Watches, Bags, Jewellery",https://www.daraz.com.bd/school-bags/,kids bag toddler backpack with leash messenger...,350,286,18.0,Darun Online Shop,3,90,0,100,Free Delivery,Cash on Delivery Available,No


# **Viewing DataFrame Shape**

In [8]:
print(f"Number of Columns: {data_frame.shape[0]}")
print(f"Number of Rows: {data_frame.shape[1]}")

Number of Columns: 12907
Number of Rows: 14


# **Hello**

## **Checking the Columns**

In [9]:
columns = list(data_frame.columns)
print(f"Available Columns: {columns}")

Available Columns: ['Category', 'SubCategory', 'Title', 'Original Price', 'Discount Price', 'Discount', 'Seller Name', 'Number of Ratings', 'Positive Seller Ratings', 'Ship On Time', 'Chat Response Rate', 'Delivery Type', 'Cash On Delivery', 'Flagship Store']


## **Changing column value from Link to Name**

### **A DataFrame containing the Sub-Category Link and related Name**

In [10]:
sub_categories = pd.read_csv(f"{base_path}subcategories.csv")

### **Viewing Sub-Category**

In [11]:
sub_categories.head()

Unnamed: 0,Category,SubCategory Name,SubCategory Link
0,Women's & Girls' Fashion,Traditional Clothing,https://www.daraz.com.bd/womens-traditional-cl...
1,Women's & Girls' Fashion,Muslim Wear,https://www.daraz.com.bd/womens-abayas-and-hij...
2,Women's & Girls' Fashion,Clothing,https://www.daraz.com.bd/womens-clothing/?serv...
3,Women's & Girls' Fashion,"Lingerie, Sleep & Lounge",https://www.daraz.com.bd/womens-lingerie-sleep...
4,Women's & Girls' Fashion,Shoes,https://www.daraz.com.bd/womens-shoes/?price=5...


### **Implementing Change**

In [12]:
for i in range(len(data_frame)):
  changed = 0
  j = 0
  while j < len(sub_categories) and changed != 1:
    if data_frame.iloc[i]["SubCategory"] == sub_categories.iloc[j]["SubCategory Link"]:
      data_frame.at[i,'SubCategory'] = sub_categories.iloc[j]["SubCategory Name"]
      changed = 1
    j += 1

### **Viewing Change**

In [13]:
data_frame["SubCategory"]

0                Kids Bags
1                Kids Bags
2                Kids Bags
3                Kids Bags
4                Kids Bags
               ...        
12902    Traditional Games
12903    Traditional Games
12904    Traditional Games
12905    Traditional Games
12906    Traditional Games
Name: SubCategory, Length: 12907, dtype: object

## **Checking the datatype of the columns**

In [14]:
data_frame.dtypes

Category                    object
SubCategory                 object
Title                       object
Original Price               int64
Discount Price               int64
Discount                   float64
Seller Name                 object
Number of Ratings            int64
Positive Seller Ratings      int64
Ship On Time                 int64
Chat Response Rate           int64
Delivery Type               object
Cash On Delivery            object
Flagship Store              object
dtype: object

## **Checking NaN values**

In [15]:
data_frame.isna().sum()

Category                   0
SubCategory                0
Title                      0
Original Price             0
Discount Price             0
Discount                   0
Seller Name                0
Number of Ratings          0
Positive Seller Ratings    0
Ship On Time               0
Chat Response Rate         0
Delivery Type              0
Cash On Delivery           0
Flagship Store             0
dtype: int64

### **We have not any NaN value in any of the column.**

## **Hello**

## **Handling redundant symbols from Object type columns**

In [16]:
str_columns = []
for column in columns:
  if str(data_frame[column].dtype) == 'object':
    str_columns.append(column)
    for i in range(len(data_frame)):
      data_frame.at[i, column] = data_frame.iloc[i][column].strip().replace('"',"")

## **Viewing Change**

In [17]:
data_frame[str_columns]

Unnamed: 0,Category,SubCategory,Title,Seller Name,Delivery Type,Cash On Delivery,Flagship Store
0,"Watches, Bags, Jewellery",Kids Bags,Animal Prints Cotton Canvas Children Bags Back...,Twenty Twenty,Standard Delivery,Cash on Delivery Available,No
1,"Watches, Bags, Jewellery",Kids Bags,School Bags For Girls Big Capacity Backpack Sh...,AYSHA TRADERS,Free Delivery,Cash on Delivery Available,No
2,"Watches, Bags, Jewellery",Kids Bags,Nylon Animal Children Backpacks Kids Preschool...,Shah Wholesale Fashion,Free Delivery,Cash on Delivery Available,No
3,"Watches, Bags, Jewellery",Kids Bags,Backpacks for kids in kindergarten Cute baby b...,Ghoredokanbd,Standard Delivery,Cash on Delivery Available,No
4,"Watches, Bags, Jewellery",Kids Bags,kids bag toddler backpack with leash messenger...,Darun Online Shop,Free Delivery,Cash on Delivery Available,No
...,...,...,...,...,...,...,...
12902,Mother & Baby,Traditional Games,Plastic Carom Board (Double-Sided Play) - Enjo...,WHITEFIELD,Standard Delivery,Cash on Delivery Available,No
12903,Mother & Baby,Traditional Games,Ludo & Chess Game Board Toy - Chess Board,Kids Toys Collection,Standard Delivery,Cash on Delivery Available,No
12904,Mother & Baby,Traditional Games,board game Ludu Board with chokka and bead,Panhatta,Standard Delivery,Cash on Delivery Available,No
12905,Mother & Baby,Traditional Games,Children's Toy Water Ring Funny Toy With Class...,Smile Touch,Standard Delivery,Cash on Delivery Available,No


In [18]:
data_frame['Category'].nunique()

10

In [19]:
data_frame['Cash On Delivery'].nunique()

7

In [20]:
data_frame['Delivery Type'].nunique()

2

In [21]:
data_frame['Delivery Type'].value_counts()

Free Delivery        8517
Standard Delivery    4390
Name: Delivery Type, dtype: int64

In [22]:
data_frame['Flagship Store'].value_counts()

No     10294
Yes     2613
Name: Flagship Store, dtype: int64

In [23]:
data_frame['Flagship Store'].value_counts()

No     10294
Yes     2613
Name: Flagship Store, dtype: int64

In [24]:
data_frame['Cash On Delivery'].value_counts()

Cash on Delivery Available           12646
100% Authentic from Trusted Brand      178
This item is non-returnable             45
14 days free & easy return              25
Warranty not available                   5
7 Days Returns                           5
Daraz Verified                           3
Name: Cash On Delivery, dtype: int64

In [25]:
data_frame.describe()

Unnamed: 0,Original Price,Discount Price,Discount,Number of Ratings,Positive Seller Ratings,Ship On Time,Chat Response Rate
count,12907.0,12907.0,12907.0,12907.0,12907.0,12907.0,12907.0
mean,5982.283257,4885.541877,34.849423,90.984814,79.507632,32.30712,88.999923
std,20711.321892,18196.608534,21.273665,310.67254,22.745849,46.765793,22.066898
min,5.0,5.0,0.0,0.0,0.0,0.0,0.0
25%,265.0,153.0,17.0,2.0,81.0,0.0,91.0
50%,631.0,356.0,35.0,13.0,86.0,0.0,97.0
75%,1679.5,1063.0,51.0,61.0,89.0,100.0,100.0
max,479900.0,479900.0,98.0,11914.0,100.0,100.0,100.0


In [26]:
data_frame.drop(['Cash On Delivery'], inplace=True, axis=1)

In [27]:
data_frame.columns

Index(['Category', 'SubCategory', 'Title', 'Original Price', 'Discount Price',
       'Discount', 'Seller Name', 'Number of Ratings',
       'Positive Seller Ratings', 'Ship On Time', 'Chat Response Rate',
       'Delivery Type', 'Flagship Store'],
      dtype='object')

## **If a seller sells 50 products on original price, then how many products with the discount price have to be sold to reach the amount equal to the break-even point?**
### **Original Price X 50 products = Discount Price X No. of Products?**

In [28]:
data_frame['No. of products to be sold'] = (data_frame['Original Price'] * 50.0) / data_frame['Discount Price']
data_frame['No. of products to be sold'] = data_frame['No. of products to be sold'].apply(lambda x: round(x,2))

In [29]:
data_frame['No. of products to be sold']

0        107.84
1        113.79
2         89.74
3         80.11
4         61.19
          ...  
12902     64.81
12903     70.59
12904     50.00
12905    102.74
12906    109.33
Name: No. of products to be sold, Length: 12907, dtype: float64

## **How much the seller have to increase the sell percentage to reach the break even point?**   

In [30]:
data_frame['Sell percentage to increase'] = ((data_frame['No. of products to be sold'] - 50) / 50)*100
data_frame['Sell percentage to increase'] = data_frame['Sell percentage to increase'].apply(lambda x: math.ceil(x))

In [31]:
data_frame['Sell percentage to increase']

0        116
1        128
2         80
3         61
4         23
        ... 
12902     30
12903     42
12904      0
12905    106
12906    119
Name: Sell percentage to increase, Length: 12907, dtype: int64

In [32]:
data_frame['Number of Ratings'].max()

11914

In [34]:
data_frame.to_csv(f'{base_path}Dataset/Top_Selling_Product_Data.csv', index=False)