# DIFFERENT WAYS OF DISPLAYING DATA

### IMPORTS

In [2]:
import numpy as np
import pandas as pd

%config IPCompleter.greedy = True
# Suppress scientific notation
np.set_printoptions(suppress=True)

In [2]:
orders = pd.read_csv('http://bit.ly/chiporders', sep='\t')
drinks = pd.read_csv('http://bit.ly/drinksbycountry')
movies = pd.read_csv('http://bit.ly/imdbratings')
titanic = pd.read_csv('http://bit.ly/kaggletrain')

In [4]:
orders = pd.read_csv('http://bit.ly/chiporders', sep='\t')
orders['item_price'] = orders.item_price.str.replace('$', '').astype('float')

In [5]:
orders.head()

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,2.39
1,1,1,Izze,[Clementine],3.39
2,1,1,Nantucket Nectar,[Apple],3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",16.98


### USING GROUPBY TO DISPLAY TOTAL SUM OF EACH ORDER

In [73]:
orders['item_price'] = orders['item_price'].astype('float')
orders.groupby('order_id')['item_price'].sum().head()

order_id
1    11.56
2    16.98
3    12.67
4    21.00
5    13.70
Name: item_price, dtype: float64

### USING GROUPBY TO DISPLAY TOTAL SUM AND TOTAL ITEM OF EACH ORDER

In [75]:
orders.groupby('order_id')['item_price'].agg(['sum', 'count']).head()

Unnamed: 0_level_0,sum,count
order_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,11.56,4
2,16.98,1
3,12.67,2
4,21.0,2
5,13.7,2


# SORT VALUES

In [8]:
drinks = pd.read_csv('http://bit.ly/drinksbycountry')
drinks.sort_values('total_litres_of_pure_alcohol', ascending=False).head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
15,Belarus,142,373,42,14.4,Europe
98,Lithuania,343,244,56,12.9,Europe
3,Andorra,245,138,312,12.4,Europe
68,Grenada,199,438,28,11.9,North America
45,Czech Republic,361,170,134,11.8,Europe


# PIVOT TABLE

In [15]:
titanic.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [16]:
titanic.pivot_table(index='Sex', columns='Pclass', values='Survived', aggfunc='mean')

Pclass,1,2,3
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
female,0.968085,0.921053,0.5
male,0.368852,0.157407,0.135447


### DIVIDE STAR_RATINGS INTO 5 CATEGORIES AND COUNT THE NUMBER OF RATINGS ACROSS EACH GENRE

In [14]:
movies['rating_cat'] = pd.cut(movies['star_rating'], bins=5, labels=['Best', 'Good', 'Medium', 'Bad', 'Stay Away'] )
movies.pivot_table(index='rating_cat', columns='genre', values='star_rating', aggfunc='count')

genre,Action,Adventure,Animation,Biography,Comedy,Crime,Drama,Family,Fantasy,Film-Noir,History,Horror,Mystery,Sci-Fi,Thriller,Western
rating_cat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Best,57.0,24.0,20.0,31.0,77.0,52.0,105.0,,1.0,1.0,,16.0,5.0,1.0,3.0,1.0
Good,54.0,33.0,28.0,34.0,55.0,44.0,117.0,2.0,,1.0,1.0,8.0,6.0,3.0,2.0,3.0
Medium,18.0,14.0,13.0,9.0,21.0,20.0,46.0,,,1.0,,4.0,4.0,1.0,,3.0
Bad,6.0,4.0,1.0,3.0,3.0,5.0,10.0,,,,,1.0,1.0,,,2.0
Stay Away,1.0,,,,,3.0,,,,,,,,,,
