In [2]:
import pandas as pd

csv = pd.read_csv('./resources/superstore.csv')

In [48]:
csv.columns = csv.columns.str.lower()
csv.columns = csv.columns.str.replace(' ', '_')
csv.columns = csv.columns.str.replace('-', '_')

# csv.drop(columns=['unnamed: 0'], inplace=True)


In [49]:
csv.columns

Index(['order_id', 'order_date', 'ship_date', 'ship_mode', 'customer_id',
       'customer_name', 'segment', 'city', 'state', 'country', 'region',
       'market', 'product_id', 'category', 'sub_category', 'product_name',
       'sales', 'quantity', 'discount', 'profit', 'shipping_cost',
       'order_priority', 'delivery_days', 'order_year', 'order_month'],
      dtype='object')

In [17]:
import plotly.express as px

df = px.data.gapminder().query("year==2007")

df['RUS' == df['iso_alpha']]

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num


In [50]:
database = pd.merge(csv, df, on="country", how="left")

In [None]:
database.info()

In [None]:
import plotly.express as px

sales_per_country = database.drop(database.columns.difference(['country', 'iso_alpha']), axis=1)

sales_per_country.groupby(by=['country', 'iso_alpha'])['country'].count()
sales_per_country = pd.DataFrame({'sales' : sales_per_country.groupby(by=['country', 'iso_alpha'])['country'].count()}).reset_index()

fig = px.choropleth(sales_per_country, locations="iso_alpha",
                    color="sales", # lifeExp is a column of gapminder
                    hover_name="country", # column to add to hover information
                    color_continuous_scale=px.colors.sequential.deep,
                    
                    )
fig.show()

In [35]:
import plotly.graph_objects as go

fig = go.Figure(data=go.Choropleth(
    locations = sales_per_country['iso_alpha'],
    z = sales_per_country['sales'],
    text = sales_per_country['country'],
    colorscale = 'Blues',
    autocolorscale=False,
    reversescale=True,
    marker_line_color='darkgray',
    marker_line_width=0.5,
    colorbar_tickprefix = '$',
    colorbar_title = 'Vendas por país',
))

fig.update_layout(
    title_text='Vendas por país',
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='equirectangular'
    ),
    annotations = [dict(
        x=0.55,
        y=0.1,
        xref='paper',
        yref='paper',
        text='Source: <a href="https://www.cia.gov/library/publications/the-world-factbook/fields/2195.html">\
            CIA World Factbook</a>',
        showarrow = False
    )]
)

fig.show()

In [39]:
profit_per_country = database.drop(database.columns.difference(['country', 'iso_alpha', 'profit']), axis=1)

profit_per_country = pd.DataFrame({
    'profit' : profit_per_country.groupby(by=['country', 'iso_alpha'])['profit'].sum()
}).reset_index()

profit_per_country

Unnamed: 0,country,iso_alpha,profit
0,Afghanistan,AFG,5430
1,Albania,ALB,701
2,Algeria,DZA,9007
3,Angola,AGO,6435
4,Argentina,ARG,-18564
...,...,...,...
121,Uruguay,URY,1682
122,Venezuela,VEN,-11165
123,Vietnam,VNM,-1730
124,Zambia,ZMB,6952


In [42]:
import plotly.graph_objects as go

fig = go.Figure(data=go.Choropleth(
    locations = profit_per_country['iso_alpha'],
    z = profit_per_country['profit'],
    text = profit_per_country['country'],
    colorscale = 'Blues',
    autocolorscale=False,
    reversescale=True,
    marker_line_color='darkgray',
    marker_line_width=0.5,
    colorbar_tickprefix = '$',
    colorbar_title = 'LUCRO',
))

fig.update_layout(
    title_text='Lucro por país',
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='equirectangular'
    ),
    annotations = [dict(
        x=0.55,
        y=0.1,
        xref='paper',
        yref='paper',
        text='Source: <a href="https://www.cia.gov/library/publications/the-world-factbook/fields/2195.html">\
            CIA World Factbook</a>',
        showarrow = False
    )]
)

fig.show()

In [55]:

ship_mode_mean = database.drop(database.columns.difference(['ship_mode', 'delivery_days']), axis=1)

ship_mode_mean = pd.DataFrame({
    'mean' : ship_mode_mean.groupby(by=['ship_mode'])['delivery_days'].mean()
}).reset_index()

ship_mode_mean = ship_mode_mean.sort_values(by=['mean'])
ship_mode_mean

Unnamed: 0,ship_mode,mean
1,Same Day,0.037394
0,First Class,2.181746
2,Second Class,3.230187
3,Standard Class,4.998018


In [64]:
import plotly.express as px

long_df = px.data.medals_long()

fig = px.bar(
    ship_mode_mean, 
    x="ship_mode", 
    y="mean", 
    color="ship_mode", 
    title="Média do tempo de entrega por tipo de entrega em dias", 
    pattern_shape="ship_mode"
)
fig.show()

In [73]:
order_per_year = database.drop(database.columns.difference(['order_year']), axis=1)

order_per_year = pd.DataFrame({
    'count' : order_per_year.groupby(by=['order_year'])['order_year'].count()
}).reset_index()

order_per_year = order_per_year.sort_values(by=['count'])
order_per_year

Unnamed: 0,order_year,count
0,2012,8998
1,2013,10962
2,2014,13799
3,2015,17531


In [None]:
order_per_month = database.drop(database.columns.difference(['order_year', 'order_month']), axis=1)

order_per_month = pd.DataFrame({
    'count' : order_per_month.groupby(by=['order_month', 'order_year'])['order_month'].count()
}).reset_index()

order_per_month = order_per_month.sort_values(by=['order_year', 'order_month'])
order_per_month

In [83]:
res = csv.drop(csv.columns.difference(['sales', 'quantity', 'discount', 'profit', 'shipping_cost', 'delivery_days']), axis=1)

In [104]:
import plotly.graph_objects as go

fig = go.Figure(data=go.Heatmap(
					x=res.columns,
                    y=res.columns,
                    z=res.corr(),
                    text=res.corr(),
                    # text_auto=True,
                    texttemplate="%{text:.4f}",
                    textfont={"size": 16}))

fig.show()