In [1]:
import pandas as pd
souvenirs = pd.read_csv('souvenir.csv')
categories = pd.read_csv('category.csv')
owners = pd.read_csv('owner.csv')

# Merge data and clean up the table
souvenirs = souvenirs[['ItemID','Price','Weight','OwnerID','CategoryID','Name']]

souvenirs_with_categories = souvenirs.merge(categories,how='inner',on='CategoryID')
souvenirs_with_categories.rename(columns={"Name_x":"SouvenirName","Name_y":"CategoryName"},inplace=True)
souvenirs_with_categories_with_owners = souvenirs_with_categories.merge(owners,how='inner',on='OwnerID')
souvenirs_with_categories_with_owners.rename(columns={"Name":"OwnerName"},inplace=True)
souvenirs_with_categories_with_owners.drop(['OwnerID','CategoryID'],inplace=True,axis=1)

In [2]:
souvenirs_with_categories_with_owners[['Price','Weight']].describe()

Unnamed: 0,Price,Weight
count,147.0,109.0
mean,12268.0,171.754679
std,148459.6,1688.38972
min,0.0,0.75
25%,0.0,4.0
50%,2.99,6.0
75%,16.24,16.0
max,1800000.0,17637.0


In [3]:
#Aggregates

souvenirs_with_categories_with_owners.agg({'Price':['min','max','median','sum'],'Weight':['min','max','median']})

Unnamed: 0,Price,Weight
min,0.0,0.75
max,1800000.0,17637.0
median,2.99,6.0
sum,1803395.65,


In [4]:
souvenirs_with_categories_with_owners.groupby('CategoryName')['Price'].sum()

CategoryName
Art                  155.98
Artifact         1800041.16
Book                 197.08
Clothing             485.94
Food                  22.95
Geological            18.99
Kitchenware          481.61
Miscellaneous       1949.44
Postcard              21.70
Tool                   0.00
Toy                   20.80
Name: Price, dtype: float64

In [5]:
#average

souvenirs_with_categories_with_owners.groupby('CategoryName')['Price'].median()

CategoryName
Art              12.500
Artifact          0.000
Book             16.490
Clothing         17.495
Food             22.950
Geological        0.000
Kitchenware      12.290
Miscellaneous     1.000
Postcard          1.225
Tool              0.000
Toy               0.000
Name: Price, dtype: float64

In [6]:
souvenirs_with_categories_with_owners.groupby('CategoryName')['Price'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
CategoryName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Art,10.0,15.598,14.575871,0.0,2.4975,12.5,23.75,40.0
Artifact,10.0,180004.116,569208.532762,0.0,0.0,0.0,0.0,1800000.0
Book,9.0,21.897778,14.230154,1.34,15.6,16.49,29.99,49.99
Clothing,16.0,30.37125,33.407899,0.0,0.0,17.495,46.2475,120.0
Food,1.0,22.95,,22.95,22.95,22.95,22.95,22.95
Geological,30.0,0.633,3.467084,0.0,0.0,0.0,0.0,18.99
Kitchenware,35.0,13.760286,8.582943,0.0,9.85,12.29,14.995,50.0
Miscellaneous,16.0,121.84,405.925566,0.0,0.0,1.0,46.625,1640.0
Postcard,12.0,1.808333,1.66111,0.0,0.9425,1.225,2.6225,5.99
Tool,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
#groupby multiple categories


souvenirs_with_categories_with_owners.groupby(['CategoryName','OwnerName'])['Price'].median()

CategoryName   OwnerName
Art            Group 1          20.000
               Group 4           0.000
Artifact       Group 1      900020.580
               Group 4           0.000
Book           Group 2          16.490
Clothing       Group 1          19.990
               Group 2          43.500
               Group 4           0.000
Food           Group 1          22.950
Geological     Group 2           0.000
               Group 4           0.000
               Group 5           0.000
Kitchenware    Group 1           8.950
               Group 2          16.490
               Group 3          11.695
               Group 4           0.000
Miscellaneous  Group 1          25.975
               Group 2          22.750
               Group 4           0.000
Postcard       Group 2           1.620
               Group 4           0.000
Tool           Group 4           0.000
Toy            Group 1          20.800
               Group 4           0.000
Name: Price, dtype: float64

In [10]:
#Aggregating string values

souvenirs_with_categories_with_owners.groupby(['OwnerName','CategoryName']).agg({'SouvenirName':list})


Unnamed: 0_level_0,Unnamed: 1_level_0,SouvenirName
OwnerName,CategoryName,Unnamed: 2_level_1
Group 1,Art,"[Crafts Enameled Cockatoo, Hand Painted Rose F..."
Group 1,Artifact,[Vintage Maori Carving Intricate Patu Club & L...
Group 1,Clothing,"[Tenugui Cotton Cloth Rice Grains, Alpaca Ponc..."
Group 1,Food,[Organic Maple Syrup]
Group 1,Kitchenware,[Shot Glass]
Group 1,Miscellaneous,"[Resort Snowglobe, Iron Standing Dog, Cast Iro..."
Group 1,Toy,[12'' Ancient Traditional Korea Hanbok Doll]
Group 2,Book,"[Hand-bound sketchbook, Ink Artbook, Manga, Mo..."
Group 2,Clothing,"[Fancy Scarf, Sari, Blouse, Sunglasses, Headsc..."
Group 2,Geological,"[Smooth Rock, Black Rock, Obsidian]"


In [11]:
#Functions   AND ADD NEW COLUMNS

bills = souvenirs_with_categories_with_owners[['OwnerName','CategoryName','Price']]

def calc_tips_10percent(x):
    return (x * .1)

bills['tip'] = bills.Price.apply(calc_tips_10percent)
bills['final_total'] = bills['Price'] + bills['tip']
bills.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bills['tip'] = bills.Price.apply(calc_tips_10percent)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bills['final_total'] = bills['Price'] + bills['tip']


Unnamed: 0,OwnerName,CategoryName,Price,tip,final_total
0,Group 3,Kitchenware,7.5,0.75,8.25
1,Group 3,Kitchenware,9.99,0.999,10.989
2,Group 3,Kitchenware,12.99,1.299,14.289
3,Group 3,Kitchenware,10.25,1.025,11.275
4,Group 3,Kitchenware,6.91,0.691,7.601


In [14]:
# Function with parameter

bills = souvenirs_with_categories_with_owners[['OwnerName','CategoryName','Price']]
bills.head()

def calc_tips_percent(total,percentage):
    return (total * percentage/100)

bills['tip'] = bills.Price.apply(calc_tips_percent,percentage=10)
bills['final_total'] = bills['Price'] + bills['tip']
bills.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bills['tip'] = bills.Price.apply(calc_tips_percent,percentage=10)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bills['final_total'] = bills['Price'] + bills['tip']


Unnamed: 0,OwnerName,CategoryName,Price,tip,final_total
0,Group 3,Kitchenware,7.5,0.75,8.25
1,Group 3,Kitchenware,9.99,0.999,10.989
2,Group 3,Kitchenware,12.99,1.299,14.289
3,Group 3,Kitchenware,10.25,1.025,11.275
4,Group 3,Kitchenware,6.91,0.691,7.601


In [16]:
bills = souvenirs_with_categories_with_owners[['OwnerName','CategoryName','Price']]
bills.head()

def calc_tips_percent(total,percentage):
    return (total * percentage/100)

bills['tip'] = bills.Price.apply(calc_tips_percent,percentage=25)
bills['final_total'] = bills['Price'] + bills['tip']
bills.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bills['tip'] = bills.Price.apply(calc_tips_percent,percentage=25)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bills['final_total'] = bills['Price'] + bills['tip']


Unnamed: 0,OwnerName,CategoryName,Price,tip,final_total
0,Group 3,Kitchenware,7.5,1.875,9.375
1,Group 3,Kitchenware,9.99,2.4975,12.4875
2,Group 3,Kitchenware,12.99,3.2475,16.2375
3,Group 3,Kitchenware,10.25,2.5625,12.8125
4,Group 3,Kitchenware,6.91,1.7275,8.6375
