# Visualización interactiva con Plotly: superstore dataset

Completa el siguiente conjunto de ejercicios para solidificar sus conocimientos sibre visualización interactiva utilizando Plotly.

## 0. Lectura de datos.

In [1]:
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
from plotly.offline import init_notebook_mode, iplot, plot
import plotly as py
init_notebook_mode(connected=True)
import plotly.graph_objs as go
from wordcloud import WordCloud #para graficar la nube de puntos
import matplotlib.pyplot as plt
import plotly.figure_factory as ff
import os
from datetime import datetime

In [2]:
df=pd.read_excel('./data/Sample - Superstore.xlsx')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9994 entries, 0 to 9993
Data columns (total 20 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Order ID       9994 non-null   object        
 1   Order Date     9994 non-null   datetime64[ns]
 2   Ship Date      9994 non-null   datetime64[ns]
 3   Ship Mode      9994 non-null   object        
 4   Customer ID    9994 non-null   object        
 5   Customer Name  9994 non-null   object        
 6   Segment        9994 non-null   object        
 7   Country        9994 non-null   object        
 8   City           9994 non-null   object        
 9   State          9994 non-null   object        
 10  Postal Code    9994 non-null   int64         
 11  Region         9994 non-null   object        
 12  Product ID     9994 non-null   object        
 13  Category       9994 non-null   object        
 14  Sub-Category   9994 non-null   object        
 15  Product Name   9994 n

In [3]:
df.head(3)

Unnamed: 0,Order ID,Order Date,Ship Date,Ship Mode,Customer ID,Customer Name,Segment,Country,City,State,Postal Code,Region,Product ID,Category,Sub-Category,Product Name,Sales,Quantity,Discount,Profit
0,CA-2016-152156,2016-11-08,2016-11-11,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,42420,South,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,261.96,2,0.0,41.9136
1,CA-2016-152156,2016-11-08,2016-11-11,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,42420,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.94,3,0.0,219.582
2,CA-2016-138688,2016-06-12,2016-06-16,Second Class,DV-13045,Darrin Van Huff,Corporate,United States,Los Angeles,California,90036,West,OFF-LA-10000240,Office Supplies,Labels,Self-Adhesive Address Labels for Typewriters b...,14.62,2,0.0,6.8714


## 1. Cree un gráfico de barras interactivo que muestre las ventas totales por estado para 2014.

In [13]:
#Datos del año 2014
df_14=df[df['Ship Date'].dt.year==2014]

#Un df que agrupe los estados y calcule las ventas totales
ventas_por_estado=df_14.groupby('State', as_index=False)['Sales'].sum() #.sort_values(ascending=False)

trace=go.Bar(
            x=ventas_por_estado['State'],
            y=ventas_por_estado['Sales'],
            marker=dict(color='red',
                        line=dict(color='black', width=2))
)

data=[trace]
layout=dict(title=dict(text='Ventas totales por estado (2014)', x=0.5),
            xaxis=dict(title='Estado'),
            yaxis=dict(title='Ventas totales'),
            height=600,  # Ajustar la altura
            width=900  #anchura
            )

fig=go.Figure(data=data, layout=layout)

iplot(fig)

PRUEBAS:

- .dt.year: Extrae el año de la fecha.
- .dt.month: Extrae el mes de la fecha.
- .dt.day: Extrae el día del mes de la fecha.
- .dt.hour: Extrae la hora de la fecha.
- .dt.minute: Extrae el minuto de la fecha.
- .dt.second: Extrae el segundo de la fecha.
- .dt.dayofweek: Devuelve el día de la semana como un número, donde 0 es lunes y 6 es domingo.
- .dt.day_name(): Devuelve el nombre del día de la semana.

In [16]:
df_14=df[df['Ship Date'].dt.year==2014]
df_14.head(2)

Unnamed: 0,Order ID,Order Date,Ship Date,Ship Mode,Customer ID,Customer Name,Segment,Country,City,State,Postal Code,Region,Product ID,Category,Sub-Category,Product Name,Sales,Quantity,Discount,Profit
5,CA-2014-115812,2014-06-09,2014-06-14,Standard Class,BH-11710,Brosina Hoffman,Consumer,United States,Los Angeles,California,90032,West,FUR-FU-10001487,Furniture,Furnishings,Eldon Expressions Wood and Plastic Desk Access...,48.86,7,0.0,14.1694
6,CA-2014-115812,2014-06-09,2014-06-14,Standard Class,BH-11710,Brosina Hoffman,Consumer,United States,Los Angeles,California,90032,West,OFF-AR-10002833,Office Supplies,Art,Newell 322,7.28,4,0.0,1.9656


Queremos calcular las ventas totales por estado, por lo tanto agrupemos por estado y calculemos las ventas totales.

In [21]:
ventas_por_estado=df_14.groupby('State', as_index=False)['Sales'].sum()
ventas_por_estado.head()

Unnamed: 0,State,Sales
0,Alabama,5617.17
1,Arizona,7743.25
2,Arkansas,6302.69
3,California,88236.043
4,Colorado,6502.294


## 2. Cree un gráfico de líneas interactivo que muestre las ventas y las ganancias en 2014.

In [14]:
df_14=df[df['Ship Date'].dt.year==2014]
df_14.head(2)

Unnamed: 0,Order ID,Order Date,Ship Date,Ship Mode,Customer ID,Customer Name,Segment,Country,City,State,Postal Code,Region,Product ID,Category,Sub-Category,Product Name,Sales,Quantity,Discount,Profit
5,CA-2014-115812,2014-06-09,2014-06-14,Standard Class,BH-11710,Brosina Hoffman,Consumer,United States,Los Angeles,California,90032,West,FUR-FU-10001487,Furniture,Furnishings,Eldon Expressions Wood and Plastic Desk Access...,48.86,7,0.0,14.1694
6,CA-2014-115812,2014-06-09,2014-06-14,Standard Class,BH-11710,Brosina Hoffman,Consumer,United States,Los Angeles,California,90032,West,OFF-AR-10002833,Office Supplies,Art,Newell 322,7.28,4,0.0,1.9656


In [16]:
#Groupby por meses
df_14=df[df['Ship Date'].dt.year==2014]
df_14_meses=df_14.groupby(df_14['Ship Date'].dt.month)[['Sales','Profit']].sum().reset_index()

df_14_meses=df_14_meses.rename(columns={'Ship Date':'2014 months'})
df_14_meses['2014 months']=pd.to_datetime(df_14_meses['2014 months'],format='%m').dt.strftime('%B')
df_14_meses

Unnamed: 0,2014 months,Sales,Profit
0,January,13275.131,2424.1125
1,February,5406.528,866.1925
2,March,50708.349,123.4122
3,April,30388.465,3313.2568
4,May,22478.88,2636.2171
5,June,35991.6556,5316.866
6,July,32990.594,-1047.8993
7,August,30245.242,6161.5346
8,September,73126.6533,6835.8856
9,October,35741.477,4567.7754


In [17]:
#Groupby por meses
df_14=df[df['Ship Date'].dt.year==2014]
df_14_meses=df_14.groupby(df_14['Ship Date'].dt.month)[['Sales','Profit']].sum().reset_index()

df_14_meses=df_14_meses.rename(columns={'Ship Date':'2014 months'})
df_14_meses['2014 months']=pd.to_datetime(df_14_meses['2014 months'],format='%m').dt.strftime('%B')
df_14_meses

#Ventas por meses
trace1=go.Scatter(
                x=df_14_meses['2014 months'],
                y=df_14_meses['Sales'],
                name='Ventas',
                mode='lines+markers',
                marker=dict(color='red')
)

#Ganancias por meses
trace2=go.Scatter(
                x=df_14_meses['2014 months'],
                y=df_14_meses['Profit'],
                name='Ganancias',
                mode='lines+markers',
                marker=dict(color='black')
)

data=[trace1,trace2]
layout=dict(title=dict(text='Ventas y ganancias por meses (2014)', x=0.5),
            xaxis=dict(title='meses'))

fig=go.Figure(data=data, layout=layout)
iplot(fig)

In [21]:
df_2=df[df['Order Date'].dt.year==2014].groupby('Order Date')[['Sales','Profit']].sum()

trace1=go.Scatter(x=df_2.index, 
                  y=df_2['Sales'],
                  name='Ventas',
                  mode='lines+markers',
                marker=dict(color='red'))

trace2=go.Scatter(x=df_2.index, 
                  y=df_2['Profit'],
                  name='Ganancias',
                 mode='lines+markers',
                marker=dict(color='black'))

data=[trace1,trace2]
fig=go.Figure(data=data)

iplot(fig)

## 3. Cree un conjunto de 3 histogramas interactivos que muestren las distribuciones de cantidad por pedido para los siguientes estados: California, Nueva York y Texas.

In [102]:
texas=df[df['State']=='Texas']
california=df[df['State']=='California']
new_york=df[df['State']=='New York']

#Texas
trace1=go.Histogram(x=texas['Quantity'],
                    opacity=0.8,
                    name='Texas',
                    marker=dict(color='black')
                    )

#Califronia
trace2=go.Histogram(x=california['Quantity'],
                    opacity=0.8,
                    name='Califronia',
                    marker=dict(color='rgba(12,50,196,0.6)')
                    )

#New York
trace3=go.Histogram(x=new_york['Quantity'],
                    opacity=0.5,
                    name='New York',
                    marker=dict(color='red')
                    )


data=[trace1,trace2,trace3]
layout=dict(barmode='overlay',
            title=dict(text='Cantidades por pedido',x=0.5),
            xaxis=dict(title='Cantidades por pedido'),
            yaxis=dict(title='Veces'))


fig=go.Figure(data=data,layout=layout)

iplot(fig)

In [27]:
texas=df[df['State']=='Texas']
california=df[df['State']=='California']
new_york=df[df['State']=='New York']

#Texas
trace1=go.Histogram(x=texas['Quantity'],
                    opacity=0.8,
                    name='Texas',
                    marker=dict(color='black')
                    )

#Califronia
trace2=go.Histogram(x=california['Quantity'],
                    opacity=0.8,
                    name='Califronia',
                    marker=dict(color='rgba(12,50,196,0.6)'),
                    xaxis='x2')

#New York
trace3=go.Histogram(x=new_york['Quantity'],
                    opacity=0.5,
                    name='New York',
                    marker=dict(color='red'),
                    xaxis='x3')


data=[trace1,trace2,trace3]
layout=dict(barmode='overlay',
            title=dict(text='Cantidades por pedido',x=0.5),
            xaxis=dict(title='Cantidades', domain=[0,0.3]),
            xaxis2=dict(title='Cantidades', domain=[0.35,0.65]),
            xaxis3=dict(title='Cantidades', domain=[0.70,1]),
            yaxis=dict(title='Veces'))


fig=go.Figure(data=data,layout=layout)

iplot(fig)

## 4. Cree un gráfico de barras apiladas interactivo que muestre los ingresos por estado (barras) para cada una de las categorías de productos para California, Nueva York y Texas.

In [28]:
texas=df[df['State']=='Texas'].groupby('Category', as_index=False)['Profit'].sum()
california=df[df['State']=='California'].groupby('Category', as_index=False)['Profit'].sum()
new_york=df[df['State']=='New York'].groupby('Category', as_index=False)['Profit'].sum()
texas

Unnamed: 0,Category,Profit
0,Furniture,-10436.1419
1,Office Supplies,-18584.6434
2,Technology,3291.429


In [32]:
texas=df[df['State']=='Texas'].groupby('Category', as_index=False)['Profit'].sum()
california=df[df['State']=='California'].groupby('Category', as_index=False)['Profit'].sum()
new_york=df[df['State']=='New York'].groupby('Category', as_index=False)['Profit'].sum()

#Texas
trace1=dict(x=texas['Category'],
            y=texas['Profit'],
            type='bar',
            name='Texas')

#California
trace2=dict(x=california['Category'],
            y=california['Profit'],
            type='bar',
            name='California')

#NewYork
trace3=dict(x=new_york['Category'],
            y=new_york['Profit'],
            type='bar',
            name='New York')


data=[trace1,trace2,trace3]
layout=dict(#barmode='stack',
            title=dict(text='Ingresos por estado y categorias',x=0.5),
            xaxis=dict(title='Categorias'),
            yaxis=dict(title='Ingresos'))

fig=go.Figure(data=data, layout=layout)

iplot(fig)

In [31]:
import plotly.express as px
df_4=df[df['State'].isin(['California','New York','Texas'])].groupby(['State','Category'],as_index=False)[['Profit']].sum()
fig=px.bar(df_4, x='Category',y='Profit',color='State')

fig.show()

## 5. Cree un gráfico de líneas interactivo que muestre las ventas por día en California.

In [33]:
df_14=df[df['Ship Date'].dt.year==2014] #Voy a coger de 2014
california_14=df_14[df_14['State']=='California']
california_dias=california_14.groupby('Ship Date', as_index=False)['Sales'].sum()

trace1=go.Scatter(
                x=california_dias['Ship Date'],
                y=california_dias['Sales'],
                mode='lines+markers',
                text=california_dias['Ship Date'],
                marker=dict(color='black')
)

data=[trace1]
layout=dict(title=dict(text='Ventas por día en California (2014)', x=0.5),
            xaxis=dict(title='Fecha'),
            yaxis=dict(title='Ventas totales'),
            height=600,  # Ajustar la altura
            width=1000  #anchura
            )

fig=go.Figure(data=data, layout=layout)
iplot(fig)

In [38]:
df_14=df #Voy a coger todos los años
california_14=df_14[df_14['State']=='California']
california_dias=california_14.groupby('Ship Date', as_index=False)['Sales'].sum()

trace1=go.Scatter(
                x=california_dias['Ship Date'],
                y=california_dias['Sales'],
                mode='lines+markers',
                text=california_dias['Ship Date'],
                marker=dict(color='blue')
)

data=[trace1]
layout=dict(title=dict(text='Ventas por día en California', x=0.5),
            xaxis=dict(title='Fecha'),
            yaxis=dict(title='Ventas totales'),
            template='plotly_dark',
            height=600,  # Ajustar la altura
            width=1500  #anchura
            )

fig=go.Figure(data=data, layout=layout)
iplot(fig)