## Setup

### Imports

In [72]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px

### Importação do CSV

In [73]:
csv = pd.read_csv('./resources/superstore.csv')

### Definição de métodos

In [74]:
def filter_columns(df, columns):
    return df.drop(df.columns.difference(columns), axis=1)

## Tratamento do Dataframe

### Padronizar nome das colunas

In [75]:
csv.columns = csv.columns.str.lower()
csv.columns = csv.columns.str.replace(' ', '_')
csv.columns = csv.columns.str.replace('-', '_')

### Remover colunas que não serão utilizadas

In [76]:
columns_to_drop =  [
    'unnamed:_0'
]

csv.drop(columns=columns_to_drop, inplace=True)

In [77]:
csv.columns

Index(['order_id', 'order_date', 'ship_date', 'ship_mode', 'customer_id',
       'customer_name', 'segment', 'city', 'state', 'country', 'region',
       'market', 'product_id', 'category', 'sub_category', 'product_name',
       'sales', 'quantity', 'discount', 'profit', 'shipping_cost',
       'order_priority', 'delivery_days', 'order_year', 'order_month'],
      dtype='object')

### Obter dataframe dos países

In [78]:
import plotly.express as px

gapminder_df = px.data.gapminder().query("year==2007")
gapminder_df = filter_columns(gapminder_df, ['country', 'continent', 'iso_alpha', 'iso_num'])

### Adicionar países que não estão inclusos

In [79]:
new_countries = pd.DataFrame({
    'country': ['Russia', 'Ukraine', 'Kazakhstan', 'South Korea', 'Suriname'],
    'continent': ['Europe', 'Europe', 'Europe', 'Asia', 'Americas'],
    'iso_alpha': ['RUS', 'UKR', 'KAZ', 'KOR', 'SUR'],
    'iso_num': [643, 804, 398, 410, 740]
})

gapminder_df = pd.concat([gapminder_df, new_countries], ignore_index=True)

### Atualizar valores

In [80]:
countries = [
    ('YEM', 'Yemen')
]

for iso_alpha, country in countries:
    gapminder_df.loc[gapminder_df['iso_alpha'] == iso_alpha, 'country'] = country

### Realizando o merge

In [81]:
database = pd.merge(csv, gapminder_df, on="country", how="left")

## Análises

### Quantidade de Vendas por país

In [82]:
sales_per_country = filter_columns(database, ['country', 'iso_alpha'])
sales_per_country.groupby(by=['country', 'iso_alpha'])['country'].count()
sales_per_country = pd.DataFrame({
    'sales' : sales_per_country.groupby(by=['country', 'iso_alpha'])['country'].count()
}).reset_index()

fig = go.Figure(data=go.Choropleth(
    locations=sales_per_country['iso_alpha'],
    z=sales_per_country['sales'],
    text=sales_per_country['country'],
    colorscale='Blues',
    autocolorscale=False,
    reversescale=True,
    marker_line_color='darkgray',
    marker_line_width=0.5,
    colorbar_title = 'Número de vendas',
))

fig.update_layout(
    margin=dict(l=50, r=50, t=50, b=50),
    title_text='Quantidade de Vendas por País',
    geo=dict(
        showframe=False,
        showcoastlines=True,
        projection_type='equirectangular'
    )
)

fig.show()

### Lucro por país

In [83]:
profit_per_country = filter_columns(database, ['country', 'iso_alpha', 'profit'])

profit_per_country = pd.DataFrame({
    'profit' : profit_per_country.groupby(by=['country', 'iso_alpha'])['profit'].sum()
}).reset_index()

In [None]:
fig = go.Figure(data=go.Choropleth(
    locations = profit_per_country['iso_alpha'],
    z = profit_per_country['profit'],
    text = profit_per_country['country'],
    colorscale = 'Blues',
    autocolorscale=False,
    reversescale=True,
    marker_line_color='darkgray',
    marker_line_width=0.5,
    colorbar_tickprefix = '$',
    colorbar_title = 'LUCRO',
))

fig.update_layout(
    margin=dict(l=50, r=50, t=50, b=50),
    title_text='Lucro por país',
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='equirectangular'
    )
)

fig.show()

### Média do tempo de entrega por tipo de entrega em dias

In [51]:
ship_mode_mean = filter_columns(database, ['ship_mode', 'delivery_days'])

ship_mode_mean = pd.DataFrame({
    'mean' : ship_mode_mean.groupby(by=['ship_mode'])['delivery_days'].mean()
}).reset_index()

ship_mode_mean = ship_mode_mean.sort_values(by=['mean'])

In [None]:
long_df = px.data.medals_long()

fig = px.bar(
    ship_mode_mean, 
    x="ship_mode", 
    y="mean", 
    color="ship_mode", 
    title="Média do tempo de entrega por tipo de entrega em dias", 
    pattern_shape="ship_mode"
)
fig.show()

### Pedidos por ano

In [54]:
order_per_year = filter_columns(database, ['order_year'])

order_per_year = pd.DataFrame({
    'count' : order_per_year.groupby(by=['order_year'])['order_year'].count()
}).reset_index()

order_per_year = order_per_year.sort_values(by=['count'])

Unnamed: 0,order_year,count
0,2012,8998
1,2013,10962
2,2014,13799
3,2015,17531


### Pedidos por mês e ano

In [64]:
order_per_month = filter_columns(database, ['order_year', 'order_month'])

order_per_month = pd.DataFrame({
    'count' : order_per_month.groupby(by=['order_month', 'order_year'])['order_month'].count()
}).reset_index()

order_per_month = order_per_month.sort_values(by=['order_year', 'order_month'])

### Mapa de calor das correlações

In [65]:
res = filter_columns(csv, ['sales', 'quantity', 'discount', 'profit', 'shipping_cost', 'delivery_days'])

In [None]:
import plotly.graph_objects as go

fig = go.Figure(data=go.Heatmap(
					x=res.columns,
                    y=res.columns,
                    z=res.corr(),
                    text=res.corr(),
                    # text_auto=True,
                    texttemplate="%{text:.4f}",
                    textfont={"size": 16}))

fig.show()