<img src="https://images.financialexpress.com/2019/12/ecommerce-5.jpg">
<img src = "https://media.giphy.com/media/z2D26GunfUK1W/giphy.gif">

<h3 class="list-group-item list-group-item-action active">In Depth Exploration of Summer Products Sales</h3>
<h3 class="list-group-item list-group-item-action active"></h3>

In [None]:
!pip install Autoviz -q
!pip install dexplot -q
!pip install pycaret -q

In [None]:
import warnings
warnings.filterwarnings('ignore')

# for some basic operations
import numpy as np 
import pandas as pd 

# for visualizations
import matplotlib.pyplot as plt
import seaborn as sns
from pandas import plotting
from pandas_profiling import ProfileReport
from pandas.plotting import parallel_coordinates

# for interactive visualizations
import plotly
import plotly.offline as py
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
from plotly import tools
init_notebook_mode(connected = True)
import plotly.figure_factory as ff
import dexplot as dxp
# for animated visualizations
import plotly_express as px

# for providing path
import os
print(os.listdir("../input"))

# for modelling
import sklearn
import imblearn

# for model explanation
import shap 
import eli5
RANDOM_SEED = 42


In [None]:
df_all = pd.read_csv('../input/ecommerce-data/ecommerce.csv')
df_summer = pd.read_csv('../input/summer-products-and-sales-in-ecommerce-wish/summer-products-with-rating-and-performance_2020-08.csv')

In [None]:
df_summer.head()

In [None]:
df_all.head()

<h3 class="list-group-item list-group-item-action active">In Depth Exploration with Pandas Profiling and AutoViz</h3>
<h3 class="list-group-item list-group-item-action active"></h3>

In [None]:
profile = ProfileReport(df_summer,title = 'Pandas Profile Report')

In [None]:
profile.to_notebook_iframe()

In [None]:
from autoviz.AutoViz_Class import AutoViz_Class

AV = AutoViz_Class()

In [None]:
filename = "../input/summer-products-and-sales-in-ecommerce-wish/summer-products-with-rating-and-performance_2020-08.csv"
sep = ","
dft = AV.AutoViz(
    filename,
    sep,
    'price',
    df_summer,
    header=0,
    verbose=0,
    lowess=False,
    chart_format="svg",
    max_rows_analyzed=150000,
    max_cols_analyzed=30,
)

<h3 class="list-group-item list-group-item-action active">EDA for Summer Products Sales</h3>
<h3 class="list-group-item list-group-item-action active"></h3>

In [None]:
df_summer.describe(include=['O']).transpose()

In [None]:
df_summer.describe().transpose()

In [None]:
df_summer[df_summer['origin_country'] == 'US'][['units_sold', 'merchant_name', 'merchant_rating']].head(10).style.background_gradient('magma')

In [None]:
df_summer[df_summer['origin_country'] == 'CN'][['units_sold', 'merchant_name', 'merchant_rating']].head(10).style.background_gradient('Pastel1')

In [None]:
df_summer[df_summer['product_color'] == 'White'][['units_sold', 'merchant_name', 'merchant_rating']].head(10).style.background_gradient('Pastel1')

In [None]:
data = df_summer.copy()

In [None]:
df_summer.dropna(inplace=True)
df_summer.isna().any()

In [None]:
df_summer.info()

In [None]:
plt.rcParams['figure.figsize'] = (20, 9)

sns.countplot(df_summer['shipping_option_name'], palette = 'gist_earth')

plt.title('Shipping Options in Summer Sales', fontweight = 30, fontsize = 20)
plt.xticks(rotation = 90)
plt.show()

<a style="color:blue;font-size:15px">Interesting to Analyze that most of orders are shipped through Livraison Standard</a>

In [None]:
# plotting a scatter plot

fig = px.scatter(df_summer, x = 'price', y = 'units_sold', color = 'shipping_option_name',
                marginal_x = 'rug', marginal_y = 'histogram',title='Scatter for Selling Price and Units sold')
fig.show()

In [None]:
fig = px.scatter(df_summer, x = 'retail_price', y = 'rating', color = 'uses_ad_boosts',
                marginal_x = 'rug', marginal_y = 'histogram',title='Scatter for Retail Price and Ratings')
fig.show()

In [None]:
fig = px.scatter_matrix(df_summer, dimensions=["price", "retail_price", "currency_buyer"],
                        color = "shipping_option_name",title = 'Scatter Matrix for Price Distribution')
fig.show()

In [None]:
dxp.bar(x = 'price',y = 'units_sold',data = df_summer)

In [None]:
plt.style.use('seaborn')


df_summer['product_color'].value_counts().head(15).plot.pie(figsize = (15, 8))

plt.title('Categorization of Product Color',fontsize = 20)

plt.xticks(rotation = 90)
plt.show()

<a style="color:blue;font-size:15px">Clothings with Black,White and Green are in Demand for Summer Sales</a>

In [None]:
fig = px.box(df_summer, x="badge_product_quality", y="units_sold", color="uses_ad_boosts", notched=True,color_discrete_sequence=["red", "green"])
fig.show()

In [None]:
m_name = df_summer['merchant_name'].value_counts()

label_m = m_name.index
size_m = m_name.values

trace = go.Pie(
         labels = label_m, values = size_m, name = 'Merchants', hole = 0.1)


df = [trace]

layout = go.Layout(
           title = 'Distribution of Merchants')

fig = go.Figure(data = df, layout = layout)

py.iplot(fig)

<a style="color:blue;font-size:15px"></a>

<a style="color:blue;font-size:15px">greatexpectationstechnology Merchant has higher market share compare to others</a>

In [None]:
trace = go.Box(
          x = df_summer['shipping_option_name'],
          y = df_summer['price'],
          opacity = 0.7,
          marker = dict(
                 color = 'rgb(215, 195, 5, 0.5)'
          )
)
df = [trace]

layout = go.Layout(
    title = 'Shipping Options vs Price')

fig = go.Figure(data = df, layout = layout)
py.iplot(fig)

In [None]:
plt.figure(figsize = (18, 8))
plt.style.use('fivethirtyeight')
ax = sns.countplot('rating_count', data = df_summer, palette = 'terrain')
ax.set_xlabel(xlabel = 'Ratings Count', fontsize = 16)
ax.set_ylabel(ylabel = 'Count of Rating', fontsize = 16)
ax.set_title(label = 'Rating Count for Different Products', fontsize = 20)
plt.show()

In [None]:
plt.rcParams['figure.figsize'] = (15, 5)
sns.distplot(df_summer['price'], color = 'r')
plt.xlabel('Price', fontsize = 16)
plt.ylabel('Count', fontsize = 16)
plt.title('Distribution of Price', fontsize = 20)
plt.xticks(rotation = 90)
plt.show()

In [None]:
plt.rcParams['figure.figsize'] = (15, 5)
sns.distplot(df_summer['retail_price'], color = 'b')
plt.xlabel('Retail Price', fontsize = 16)
plt.ylabel('Count', fontsize = 16)
plt.title('Distribution of Retail Price', fontsize = 20)
plt.xticks(rotation = 90)
plt.show()

In [None]:
plt.figure(figsize = (20, 5))
sns.distplot(df_summer['rating'], color = 'pink')
plt.title('Distribution for Rating Across Different Products', fontsize = 20)
plt.xlabel('Ratings', fontsize = 16)
plt.ylabel('count of ratings', fontsize = 16)
plt.show()

In [None]:
fig = px.bar(df_summer, x="countries_shipped_to", y="price", barmode="group",color="shipping_option_name", template="plotly_dark",
            color_discrete_sequence= px.colors.sequential.Plasma_r,title="Price Variations for Different Shipping options")
fig.show()

In [None]:
plt.figure(figsize = (13, 8))
ax = sns.countplot(x = 'countries_shipped_to', data = df_summer, palette = 'gist_stern_r')
ax.set_title(label = 'Count for Order Shipment Countries', fontsize = 20)
ax.set_xlabel(xlabel = 'Shipping Countries code', fontsize = 16)
ax.set_ylabel(ylabel = 'Count', fontsize = 16)
plt.show()

In [None]:
dxp.bar(x='product_variation_size_id', y='units_sold', data=df_summer, aggfunc='median',figsize=(15,5),sort_values = 'asc',cmap = 'dark12',title='Units Sold by Clothing Size')

<a style="color:blue;font-size:15px">L and Clothing for 4-5 years top the Selling in Summer Sales</a>

In [None]:
plt.figure(figsize = (15, 5))
ax = sns.countplot(x = 'product_variation_size_id', data = df_summer, palette = 'hot')
ax.set_title(label = 'Count of Product Sizes ', fontsize = 20)
ax.set_xlabel(xlabel = 'Product Size', fontsize = 16)
ax.set_ylabel(ylabel = 'Count', fontsize = 16)
plt.show()

In [None]:
club = df_summer.groupby('merchant_name')['units_sold'].mean().reset_index().sort_values('units_sold', ascending=True).tail(40)
fig = px.bar(
    club, 
    x="units_sold", 
    y="merchant_name", 
    orientation='h',title='Top Merchants based on Units Sold'
).show()

<a style="color:blue;font-size:15px">xiamoi has sold most products as compared to other Merchants</a>

In [None]:
club_5 = df_summer.groupby('merchant_name')['units_sold'].mean().reset_index().sort_values('units_sold', ascending=True).tail(10)


In [None]:
import plotly.graph_objects as go

colors = ['darkmagenta',] * 10
colors[1] = 'crimson'

fig = go.Figure(data=[go.Bar(
    x=club_5.merchant_name,
    y=club_5.units_sold,
    marker_color=colors
)])
fig.update_layout(title_text='Top 10 Merchants')

In [None]:
fig = px.sunburst(df_summer, path=['merchant_name', 'shipping_option_name'], values='units_sold',
                  color='shipping_option_name', hover_data=['units_sold'],title='Sunburst for Merchant Info')
fig.show()

In [None]:
dxp.count(val='units_sold', data=df_summer,split='uses_ad_boosts',figsize=(15,5),cmap='portland',title='Units Sold based on Ad Boost')

<a style="color:blue;font-size:15px">Ad Boosting Plays a vital role for Summer Sales</a>

In [None]:

dxp.bar(x='units_sold', y='rating_one_count', data=df_summer, aggfunc='median', 
        x_order='desc',split='product_color',figsize=(15,5),cmap='sunsetdark',title='Lowest Rating for Units Sold')


In [None]:
x = df_summer['price']
y = df_summer['rating']
z = df_summer['badges_count']

sns.lineplot(x, y, color = 'r')
sns.lineplot(x, z, color = 'y')
plt.title('Price vs Rating vs Badge Counts', fontsize = 20)
plt.show()

In [None]:
plt.rcParams['figure.figsize'] = (15, 8)
sns.heatmap(df_summer.corr(), cmap = 'Pastel1', annot = True)
plt.title('Heatmap for the Data', fontsize = 20)
plt.show()

<h3 class="list-group-item list-group-item-action active">Predictive Analysis for Summer Product Sales</h3>
<h3 class="list-group-item list-group-item-action active"></h3>

In [None]:
def data_sampling(dataset, frac: float, random_seed: int):
    data_sampled_a = dataset.sample(frac=frac, random_state=random_seed)
    data_sampled_b =  dataset.drop(data_sampled_a.index).reset_index(drop=True)
    data_sampled_a.reset_index(drop=True, inplace=True)
    return data_sampled_a, data_sampled_b  

In [None]:
data, data_unseen = data_sampling(data, 0.8, 42)
print(f"There are {data_unseen.shape[0]} samples for Unseen Data.")

In [None]:
data.columns

In [None]:
plt.style.use('fivethirtyeight')
plt.figure(figsize=(10,5))
sns.distplot(data.units_sold, bins=10)
plt.xlabel('Units Sold')
plt.title('Distribuition of units sold');

<a style="color:blue;font-size:20px">Setting up Pycaret Regression Session</a> <br>
<a style="color:Red;font-size:15px">Normalizing Features</a> <br>
<a style="color:Red;font-size:15px">Removing Outliers</a> <br>
<a style="color:Red;font-size:15px">Removing Multicollinearity</a> <br>
<a style="color:Red;font-size:15px">Removing heteroscedasticity </a> <br>

In [None]:
from pycaret.regression import *
setup_1 = setup(data=data, target="units_sold", session_id=RANDOM_SEED, ignore_features=["title","title_orig","merchant_name","merchant_id","merchant_title","urgency_text","merchant_profile_picture","product_url","product_picture","product_id","tags",'crawl_month'], normalize=True
                ,normalize_method='robust', remove_outliers=True,remove_multicollinearity=True,
                transformation=True,transformation_method = 'yeo-johnson', experiment_name="ecom-sales-prediction",silent=True)

In [None]:
compare_models(fold=5,exclude=['lightgbm'])

In [None]:
model_metadata = models()
model_metadata['Name']

In [None]:
model_rf = create_model('rf', fold=5, round=2)


In [None]:
model_ext = create_model('et', fold=5, round=2)


In [None]:
model_cat = create_model('catboost', fold=5, round=2)


In [None]:
tunedmodel_rf = tune_model(model_rf, fold=5)

In [None]:
tunedmodel_ext = tune_model(model_ext, fold=5)

In [None]:
tunedmodel_cat = tune_model(model_cat, fold=5)

In [None]:
blender = blend_models(estimator_list = [tunedmodel_rf, tunedmodel_ext, tunedmodel_cat])

In [None]:
plot_model(blender)

In [None]:
plot_model(blender,plot='error')

In [None]:
final_model = finalize_model(blender)
predictions = predict_model(final_model, data = data_unseen)

In [None]:
interpret_model(tunedmodel_ext)

In [None]:
interpret_model(tunedmodel_ext, plot='summary')

In [None]:
interpret_model(tunedmodel_ext, plot='correlation')

In [None]:
interpret_model(tunedmodel_ext, plot = 'reason')

## Made with ❤ 