In [1]:
import pandas as pd
import seaborn as sns

## DataFrame Concatenate
### Leetcode problem
##### => pd.concat([df1,df2])

In [2]:
df1 = pd.DataFrame(
    {
        "Student_id": [1,2,3,4],
        "Name": ["Mason","Ava","Taylor","Georgia"],
        "Age": [20,40,30,34]
    }
)

In [3]:
df1

Unnamed: 0,Student_id,Name,Age
0,1,Mason,20
1,2,Ava,40
2,3,Taylor,30
3,4,Georgia,34


In [4]:
df2 = pd.DataFrame(
    {
        "Student_id": [5,6],
        "Name": ["Alex","Travor"],
        "Age": [18,23]
    }
)

In [5]:
df2

Unnamed: 0,Student_id,Name,Age
0,5,Alex,18
1,6,Travor,23


In [6]:
new_df = pd.concat([df1,df2])

In [7]:
new_df

Unnamed: 0,Student_id,Name,Age
0,1,Mason,20
1,2,Ava,40
2,3,Taylor,30
3,4,Georgia,34
0,5,Alex,18
1,6,Travor,23


## Pivot table 
###### A Pivot Table is a tool that quickly summarizes and makes sense of a large list of data.  It lets you "pivot" (or rotate) your data to view it from different perspectives without changing the original information.

### Leetcode problem
##### => weather.pivot(index="month",columns="city",values="temperature")

In [22]:
data = {
    'Day': ['Monday', 'Monday', 'Tuesday', 'Tuesday', 'Wednesday', 'Wednesday', 'Thursday', 'Thursday',"Wednesday"],
    'Item': ['Apples', 'Bananas', 'Apples', 'Oranges', 'Bananas', 'Apples', 'Oranges', 'Bananas',"Oranges"],
    'Salesperson': ['Alice', 'Bob', 'Alice', 'Charlie', 'Bob', 'Alice', 'Charlie', 'Bob',"Alice"],
    'Amount': [100, 150, 120, 80, 90, 110, 95, 130,50]
}

In [23]:
df = pd.DataFrame(data)

In [24]:
df

Unnamed: 0,Day,Item,Salesperson,Amount
0,Monday,Apples,Alice,100
1,Monday,Bananas,Bob,150
2,Tuesday,Apples,Alice,120
3,Tuesday,Oranges,Charlie,80
4,Wednesday,Bananas,Bob,90
5,Wednesday,Apples,Alice,110
6,Thursday,Oranges,Charlie,95
7,Thursday,Bananas,Bob,130
8,Wednesday,Oranges,Alice,50


#### Example 1: Total sales for each Salesperson

In [25]:
pivot_table = pd.pivot_table(df,index="Salesperson",values="Amount",aggfunc="sum")
pivot_table

Unnamed: 0_level_0,Amount
Salesperson,Unnamed: 1_level_1
Alice,380
Bob,370
Charlie,175


In [26]:
# alternative way
df.groupby("Salesperson").Amount.sum()

Salesperson
Alice      380
Bob        370
Charlie    175
Name: Amount, dtype: int64

#### Example 2: Sales by Salesperson and Item

In [27]:
pivot1 = pd.pivot_table(df,index="Salesperson",columns="Item",values="Amount",aggfunc="sum",fill_value=0)

In [28]:
pivot1

Item,Apples,Bananas,Oranges
Salesperson,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Alice,330,0,50
Bob,0,370,0
Charlie,0,0,175


In [32]:
# another way
x = df.groupby(["Salesperson","Item"]).Amount.sum()

In [33]:
x

Salesperson  Item   
Alice        Apples     330
             Oranges     50
Bob          Bananas    370
Charlie      Oranges    175
Name: Amount, dtype: int64

In [31]:
x.reset_index()

Unnamed: 0,Salesperson,Item,Amount
0,Alice,Apples,330
1,Alice,Oranges,50
2,Bob,Bananas,370
3,Charlie,Oranges,175


#### Example 3: More detailed analysis

In [34]:

pivot3 = pd.pivot_table(df,
                       values='Amount',
                       index='Day',
                       columns='Item',
                       aggfunc='mean',
                       fill_value=0)

print("\n3. Average sales by Day and Item:")
print(pivot3)


3. Average sales by Day and Item:
Item       Apples  Bananas  Oranges
Day                                
Monday      100.0    150.0      0.0
Thursday      0.0    130.0     95.0
Tuesday     120.0      0.0     80.0
Wednesday   110.0     90.0     50.0


#### Example 4: Multiple aggregations

In [62]:
pivot2 = pd.pivot_table(df,columns="Item",index="Salesperson",values="Amount",aggfunc=["sum","count"],fill_value=0,
                       margins=True,margins_name="Total Items cost"
                       )

In [59]:
pivot2

Unnamed: 0_level_0,sum,sum,sum,sum,count,count,count,count
Item,Apples,Bananas,Oranges,Total Items cost,Apples,Bananas,Oranges,Total Items cost
Salesperson,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Alice,330,0,50,380,3,0,1,4
Bob,0,370,0,370,0,3,0,3
Charlie,0,0,175,175,0,0,2,2
Total Items cost,330,370,225,925,3,3,3,9


In [88]:
pivot4 = pd.pivot_table(df,index="Salesperson",values="Amount",aggfunc="sum",columns="Item",fill_value=0,
                        margins=True,margins_name="Total Items cost")

In [89]:
pivot4

Item,Apples,Bananas,Oranges,Total Items cost
Salesperson,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alice,330,0,50,380
Bob,0,370,0,370
Charlie,0,0,175,175
Total Items cost,330,370,225,925


In [90]:
pivot5 = pd.pivot_table(df,index="Salesperson",values="Amount",aggfunc="count",margins=True,
                        margins_name="Total Count")

In [91]:
pivot5

Unnamed: 0_level_0,Amount
Salesperson,Unnamed: 1_level_1
Alice,4
Bob,3
Charlie,2
Total Count,9
