In [3]:
import altair as alt
import pandas as pd
import numpy as np

In [7]:
n = 200

df = pd.DataFrame({
    'Region': np.random.choice(['North', 'South', 'East', 'West'], size = n),
    'Product': np.random.choice(['A', 'B', 'C'], size = n),
    'Sales': np.random.normal(5000, 1200, size = n).astype(int),
    'Profit': np.random.normal(800, 300, size = n).astype(int),
    'Month': np.random.choice(['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul',
                              'Aug', 'Sep', 'Oct', 'Nov', 'Dec'], size = n),
    'Units Sold': np.random.randint(50, 200, size = n)

})

In [8]:
df.head(2)

Unnamed: 0,Region,Product,Sales,Profit,Month,Units Sold
0,North,B,7412,949,Feb,146
1,East,C,4832,890,Jun,61


In [10]:
df.shape


(200, 6)

In [11]:
chart = alt.Chart(df).mark_point().encode(
    x = "Sales",
    y = "Profit",
    color = "Region",
    size = "Units Sold",
    tooltip=["Region", "Product", "Sales", "Profit", "Units Sold"]
).interactive()

chart

In [12]:
df.head(2)

Unnamed: 0,Region,Product,Sales,Profit,Month,Units Sold
0,North,B,7412,949,Feb,146
1,East,C,4832,890,Jun,61


In [14]:
mothly_sales = df.groupby("Month")["Sales"].mean().reset_index()

In [15]:
mothly_sales

Unnamed: 0,Month,Sales
0,Apr,4730.25
1,Aug,5259.214286
2,Dec,4800.8125
3,Feb,4764.722222
4,Jan,5426.545455
5,Jul,4941.73913
6,Jun,5049.105263
7,Mar,4405.684211
8,May,5077.818182
9,Nov,4361.777778


In [16]:
mothly_order = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul',
                'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

In [18]:
mothly_sales['Month'] = pd.Categorical(mothly_sales["Month"], categories=mothly_order, ordered=True)

monthly_sales = mothly_sales.sort_values("Month")

In [19]:
monthly_sales

Unnamed: 0,Month,Sales
4,Jan,5426.545455
3,Feb,4764.722222
7,Mar,4405.684211
0,Apr,4730.25
8,May,5077.818182
6,Jun,5049.105263
5,Jul,4941.73913
1,Aug,5259.214286
11,Sep,5285.444444
10,Oct,4435.538462


In [21]:
alt.Chart(monthly_sales).mark_line(point=True).encode(
    x = "Month",
    y = "Sales"
).interactive()

In [22]:
df.head(2)

Unnamed: 0,Region,Product,Sales,Profit,Month,Units Sold
0,North,B,7412,949,Feb,146
1,East,C,4832,890,Jun,61


In [25]:
sales_by_region = df.groupby("Region")["Sales"].sum().reset_index()

sales_by_region

Unnamed: 0,Region,Sales
0,East,197420
1,North,280330
2,South,245851
3,West,247372


In [29]:
alt.Chart(sales_by_region).mark_bar( color = "lightblue").encode(
    x = "Region",
    y = "Sales"
).properties(
    title = "Total Sales by Region"

).interactive()

In [31]:
alt.Chart(df).mark_bar().encode(
    alt.X("Profit", bin= True),
    y = "count()"
).properties(
    title = "Distribution of Profit"
)

In [35]:
alt.Chart(df).mark_point().encode(
    x="Sales",
    y = "Profit",
    color = "Region"
).facet(
    column = "Product"
).interactive()