## Basics of Pandas

In [34]:
import pandas as pd

### Load the File

In [35]:
# get the path of csv file
csv_path = './0_coffee_ratings.csv'

In [36]:
# load the file as dataframe
df = pd.read_csv(csv_path)

# load the excel file is the same which we use:
# pd.read_excel(xlsx_path)

### Examine the Dataframe

In [37]:
# Examine the first five rows of the dataframe
df.head()

Unnamed: 0,total_cup_points,species,owner,country_of_origin,farm_name,mill,company,altitude,region,producer,...,category_one_defects,quakers,color,category_two_defects,expiration,certification_body,unit_of_measurement,altitude_low_meters,altitude_high_meters,altitude_mean_meters
0,90.58,Arabica,metad plc,Ethiopia,metad plc,metad plc,metad agricultural developmet plc,1950-2200,guji-hambela,METAD PLC,...,0,0.0,Green,0,"April 3rd, 2016",METAD Agricultural Development plc,m,1950.0,2200.0,2075.0
1,89.92,Arabica,metad plc,Ethiopia,metad plc,metad plc,metad agricultural developmet plc,1950-2200,guji-hambela,METAD PLC,...,0,0.0,Green,1,"April 3rd, 2016",METAD Agricultural Development plc,m,1950.0,2200.0,2075.0
2,89.75,Arabica,grounds for health admin,Guatemala,"san marcos barrancas ""san cristobal cuch",,,1600 - 1800 m,,,...,0,0.0,,0,"May 31st, 2011",Specialty Coffee Association,m,1600.0,1800.0,1700.0
3,89.0,Arabica,yidnekachew dabessa,Ethiopia,yidnekachew dabessa coffee plantation,wolensu,yidnekachew debessa coffee plantation,1800-2200,oromia,Yidnekachew Dabessa Coffee Plantation,...,0,0.0,Green,2,"March 25th, 2016",METAD Agricultural Development plc,m,1800.0,2200.0,2000.0
4,88.83,Arabica,metad plc,Ethiopia,metad plc,metad plc,metad agricultural developmet plc,1950-2200,guji-hambela,METAD PLC,...,0,0.0,Green,2,"April 3rd, 2016",METAD Agricultural Development plc,m,1950.0,2200.0,2075.0


### Create the Dataframe

1. The keys correspond to the column label
2. The values are the list that corresponding to the rows 

In [38]:
songs = {'Album': ['Thriller', 'Back in Black', 'The Dark Side of the Moon', 'The Bodyguard', 'Bat Out of Hell'],
         'Released': [1982,1980,1973,1992,1977],
         'Length': ['00:42:19' ,'00:42:11', '00:42:49', '00:57:441', '00:46:33']}

song_frame = pd.DataFrame(songs)

song_frame

Unnamed: 0,Album,Released,Length
0,Thriller,1982,00:42:19
1,Back in Black,1980,00:42:11
2,The Dark Side of the Moon,1973,00:42:49
3,The Bodyguard,1992,00:57:441
4,Bat Out of Hell,1977,00:46:33


### Create the new dataframe based on the previous dataframe column
- Put the dataframe name and the name of the column header enclosed in double brackets.

In [39]:
# a new dataframe comprised of the original column - Released
x = song_frame[['Released']]

# can do the same thing to several columns as well
y = df[['owner', 'farm_name', 'mill']]

In [40]:
x

Unnamed: 0,Released
0,1982
1,1980
2,1973
3,1992
4,1977


In [41]:
y

Unnamed: 0,owner,farm_name,mill
0,metad plc,metad plc,metad plc
1,metad plc,metad plc,metad plc
2,grounds for health admin,"san marcos barrancas ""san cristobal cuch",
3,yidnekachew dabessa,yidnekachew dabessa coffee plantation,wolensu
4,metad plc,metad plc,metad plc
...,...,...,...
1333,luis robles,robustasa,our own lab
1334,luis robles,robustasa,own laboratory
1335,james moore,fazenda cazengo,cafe cazengo
1336,cafe politico,,


### Access the specific element
1. .iloc[integer location for row, integer location for column]
2. .loc[specific name for row, specific name for column] can use slicing as well

In [42]:
# Access the 1st row and 1st column
y.iloc[0, 0]

'metad plc'

In [43]:
# Access the 1st row and 1st column
y.loc[0:5, 'owner':'farm_name']

Unnamed: 0,owner,farm_name
0,metad plc,metad plc
1,metad plc,metad plc
2,grounds for health admin,"san marcos barrancas ""san cristobal cuch"
3,yidnekachew dabessa,yidnekachew dabessa coffee plantation
4,metad plc,metad plc
5,ji-ae ahn,


### Create a dataframe based on df which the total_cup_points is larger than 80
1. look at the total_cup_points for the coffee's total_cup_points larger than 80
2. select the corresponding line

In [48]:
# get a series of boolean numbers
# df['total_cup_points'] >= 80

df_s = df[df['total_cup_points'] >= 80.00]

In [49]:
df_s

Unnamed: 0,total_cup_points,species,owner,country_of_origin,farm_name,mill,company,altitude,region,producer,...,category_one_defects,quakers,color,category_two_defects,expiration,certification_body,unit_of_measurement,altitude_low_meters,altitude_high_meters,altitude_mean_meters
0,90.58,Arabica,metad plc,Ethiopia,metad plc,metad plc,metad agricultural developmet plc,1950-2200,guji-hambela,METAD PLC,...,0,0.0,Green,0,"April 3rd, 2016",METAD Agricultural Development plc,m,1950.0,2200.0,2075.0
1,89.92,Arabica,metad plc,Ethiopia,metad plc,metad plc,metad agricultural developmet plc,1950-2200,guji-hambela,METAD PLC,...,0,0.0,Green,1,"April 3rd, 2016",METAD Agricultural Development plc,m,1950.0,2200.0,2075.0
2,89.75,Arabica,grounds for health admin,Guatemala,"san marcos barrancas ""san cristobal cuch",,,1600 - 1800 m,,,...,0,0.0,,0,"May 31st, 2011",Specialty Coffee Association,m,1600.0,1800.0,1700.0
3,89.00,Arabica,yidnekachew dabessa,Ethiopia,yidnekachew dabessa coffee plantation,wolensu,yidnekachew debessa coffee plantation,1800-2200,oromia,Yidnekachew Dabessa Coffee Plantation,...,0,0.0,Green,2,"March 25th, 2016",METAD Agricultural Development plc,m,1800.0,2200.0,2000.0
4,88.83,Arabica,metad plc,Ethiopia,metad plc,metad plc,metad agricultural developmet plc,1950-2200,guji-hambela,METAD PLC,...,0,0.0,Green,2,"April 3rd, 2016",METAD Agricultural Development plc,m,1950.0,2200.0,2075.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1327,80.92,Robusta,kawacom uganda ltd,Uganda,bushenyi,kawacom,kawacom uganda ltd,1600,western,Kawacom uganda ltd,...,0,0.0,Green,1,"June 27th, 2015",Uganda Coffee Development Authority,m,1600.0,1600.0,1600.0
1328,80.58,Robusta,nitubaasa ltd,Uganda,kigezi coffee farmers association,nitubaasa,nitubaasa ltd,1745,western,Kigezi Coffee Farmers Association,...,0,0.0,Green,2,"June 27th, 2015",Uganda Coffee Development Authority,m,1745.0,1745.0,1745.0
1329,80.50,Robusta,mannya coffee project,Uganda,mannya coffee project,mannya coffee project,mannya coffee project,1200,southern,Mannya coffee project,...,0,0.0,Green,1,"June 27th, 2015",Uganda Coffee Development Authority,m,1200.0,1200.0,1200.0
1330,80.17,Robusta,andrew hetzel,India,sethuraman estates,,cafemakers,750m,chikmagalur,Nishant Gurjer,...,0,0.0,Bluish-Green,1,"May 19th, 2015",Specialty Coffee Association,m,750.0,750.0,750.0


### Save as csv doc
- .to_csv('name.csv')