## Pandas

In [70]:
import pandas as pd

In [71]:
grades = pd.Series([78,100,91], index = ['Sam', 'John', 'Janet'])
grades

Sam       78
John     100
Janet     91
dtype: int64

In [72]:
grades.count()

3

In [73]:
grades.mean()

89.66666666666667

In [74]:
grades = pd.Series({'Sam':78, 'John': 100, 'Janet':91})
grades

Sam       78
John     100
Janet     91
dtype: int64

In [75]:
grades['Sam']

78

In [76]:
grades.Sam

78

In [77]:
grades.values

array([ 78, 100,  91])

In [78]:
grades.dtype

dtype('int64')

In [79]:
#series of strings
hardware = pd.Series(['Hammer', 'Saw', 'Wrench'])
hardware

0    Hammer
1       Saw
2    Wrench
dtype: object

In [80]:
hardware.str.contains('a')

0     True
1     True
2    False
dtype: bool

In [81]:
hardware.str.upper()

0    HAMMER
1       SAW
2    WRENCH
dtype: object

## Pandas DataFrame

In [82]:
grades_dict = {'Sam':[87,97,71], 'Janet':[89,79,100], 'John':[81,90,88], 'Kate':[100,85,97], 'Bob':[100,70,80]}
grades = pd.DataFrame(grades_dict)
grades

Unnamed: 0,Sam,Janet,John,Kate,Bob
0,87,89,81,100,100
1,97,79,90,85,70
2,71,100,88,97,80


In [83]:
grades = pd.DataFrame(grades_dict, index = ['Test1', 'Test2', 'Test3'])
grades

Unnamed: 0,Sam,Janet,John,Kate,Bob
Test1,87,89,81,100,100
Test2,97,79,90,85,70
Test3,71,100,88,97,80


In [84]:
grades.index = ['Test1', 'Test2', 'Test3']
grades

Unnamed: 0,Sam,Janet,John,Kate,Bob
Test1,87,89,81,100,100
Test2,97,79,90,85,70
Test3,71,100,88,97,80


In [85]:
grades['Sam']

Test1    87
Test2    97
Test3    71
Name: Sam, dtype: int64

In [86]:
grades.Sam

Test1    87
Test2    97
Test3    71
Name: Sam, dtype: int64

In [87]:
grades.loc['Test1']

Sam       87
Janet     89
John      81
Kate     100
Bob      100
Name: Test1, dtype: int64

In [88]:
grades.iloc[1]

Sam      97
Janet    79
John     90
Kate     85
Bob      70
Name: Test2, dtype: int64

In [89]:
grades.loc['Test1':'Test2']

Unnamed: 0,Sam,Janet,John,Kate,Bob
Test1,87,89,81,100,100
Test2,97,79,90,85,70


In [90]:
grades.iloc[0:2]

Unnamed: 0,Sam,Janet,John,Kate,Bob
Test1,87,89,81,100,100
Test2,97,79,90,85,70


In [91]:
grades.loc[['Test1', 'Test3']]

Unnamed: 0,Sam,Janet,John,Kate,Bob
Test1,87,89,81,100,100
Test3,71,100,88,97,80


In [92]:
grades.iloc[[0,2]]

Unnamed: 0,Sam,Janet,John,Kate,Bob
Test1,87,89,81,100,100
Test3,71,100,88,97,80


In [93]:
grades.loc['Test 1':'Test2', ['Sam', 'Janet']]

Unnamed: 0,Sam,Janet
Test1,87,89
Test2,97,79


In [94]:
grades.iloc[0:2, 0:3]

Unnamed: 0,Sam,Janet,John
Test1,87,89,81
Test2,97,79,90


In [95]:
grades.iloc[[1,2], [1,2]]

Unnamed: 0,Janet,John
Test2,79,90
Test3,100,88


In [96]:
grades[grades >= 90]

Unnamed: 0,Sam,Janet,John,Kate,Bob
Test1,,,,100.0,100.0
Test2,97.0,,90.0,,
Test3,,100.0,,97.0,


In [97]:
grades[(grades >= 80) & (grades <= 90)]

Unnamed: 0,Sam,Janet,John,Kate,Bob
Test1,87.0,89.0,81,,
Test2,,,90,85.0,
Test3,,,88,,80.0


In [98]:
grades.at['Test2', 'Bob']

70

In [99]:
grades.iat[2,0]

71

In [100]:
grades.at['Test2', 'Bob'] = 50

In [101]:
grades

Unnamed: 0,Sam,Janet,John,Kate,Bob
Test1,87,89,81,100,100
Test2,97,79,90,85,50
Test3,71,100,88,97,80


In [102]:
grades.iat[1,2] = 55

In [103]:
grades

Unnamed: 0,Sam,Janet,John,Kate,Bob
Test1,87,89,81,100,100
Test2,97,79,55,85,50
Test3,71,100,88,97,80


In [104]:
grades.describe()

Unnamed: 0,Sam,Janet,John,Kate,Bob
count,3.0,3.0,3.0,3.0,3.0
mean,85.0,89.33,74.67,94.0,76.67
std,13.11,10.5,17.39,7.94,25.17
min,71.0,79.0,55.0,85.0,50.0
25%,79.0,84.0,68.0,91.0,65.0
50%,87.0,89.0,81.0,97.0,80.0
75%,92.0,94.5,84.5,98.5,90.0
max,97.0,100.0,88.0,100.0,100.0


In [105]:
pd.set_option('display.precision', 2)

In [106]:
grades.describe()

Unnamed: 0,Sam,Janet,John,Kate,Bob
count,3.0,3.0,3.0,3.0,3.0
mean,85.0,89.33,74.67,94.0,76.67
std,13.11,10.5,17.39,7.94,25.17
min,71.0,79.0,55.0,85.0,50.0
25%,79.0,84.0,68.0,91.0,65.0
50%,87.0,89.0,81.0,97.0,80.0
75%,92.0,94.5,84.5,98.5,90.0
max,97.0,100.0,88.0,100.0,100.0


In [107]:
grades.mean()

Sam      85.00
Janet    89.33
John     74.67
Kate     94.00
Bob      76.67
dtype: float64

In [108]:
grades.T.mean()

Test1    91.4
Test2    73.2
Test3    87.2
dtype: float64

In [109]:
grades.sort_index(ascending = False)

Unnamed: 0,Sam,Janet,John,Kate,Bob
Test3,71,100,88,97,80
Test2,97,79,55,85,50
Test1,87,89,81,100,100


In [110]:
grades.sort_index(axis = 1)

Unnamed: 0,Bob,Janet,John,Kate,Sam
Test1,100,89,81,100,87
Test2,50,79,55,85,97
Test3,80,100,88,97,71


In [111]:
#sort by values
grades.sort_values(by='Test1', axis = 1, ascending = False)

Unnamed: 0,Kate,Bob,Janet,Sam,John
Test1,100,100,89,87,81
Test2,85,50,79,97,55
Test3,97,80,100,71,88


In [112]:
grades.T.sort_values(by = 'Test1', ascending= False)

Unnamed: 0,Test1,Test2,Test3
Kate,100,85,97
Bob,100,50,80
Janet,89,79,100
Sam,87,97,71
John,81,55,88


In [113]:
grades.loc['Test1'].sort_values(ascending= False)

Kate     100
Bob      100
Janet     89
Sam       87
John      81
Name: Test1, dtype: int64

In [114]:
grades.to_csv('grades.csv')

In [115]:
#working with .csv files
birds = pd.read_csv('bird_tracking.csv')
birdsDf = pd.DataFrame(birds)

In [116]:
birdsDf

Unnamed: 0,altitude,date_time,device_info_serial,direction,latitude,longitude,speed_2d,bird_name
0,71,2013-08-15 00:18:08+00,851,-150.47,49.42,2.12,0.15,Eric
1,68,2013-08-15 00:48:07+00,851,-136.15,49.42,2.12,2.44,Eric
2,68,2013-08-15 01:17:58+00,851,160.80,49.42,2.12,0.60,Eric
3,73,2013-08-15 01:47:51+00,851,32.77,49.42,2.12,0.31,Eric
4,69,2013-08-15 02:17:42+00,851,45.19,49.42,2.12,0.19,Eric
...,...,...,...,...,...,...,...,...
61915,11,2014-04-30 22:00:08+00,833,45.45,51.35,3.18,0.21,Sanne
61916,6,2014-04-30 22:29:57+00,833,-112.07,51.35,3.18,1.52,Sanne
61917,5,2014-04-30 22:59:52+00,833,69.99,51.35,3.18,3.12,Sanne
61918,16,2014-04-30 23:29:43+00,833,88.38,51.35,3.18,0.59,Sanne


In [117]:
df = pd.read_csv('grades.csv')
print(df)

  Unnamed: 0  Sam  Janet  John  Kate  Bob
0      Test1   87     89    81   100  100
1      Test2   97     79    55    85   50
2      Test3   71    100    88    97   80


In [118]:
df.index = ['Test1', 'Test2', 'Test3']
df

Unnamed: 0.1,Unnamed: 0,Sam,Janet,John,Kate,Bob
Test1,Test1,87,89,81,100,100
Test2,Test2,97,79,55,85,50
Test3,Test3,71,100,88,97,80
