# Series: single row or column 

## Pandas Series: Basics

In [1]:
#import pandas
import pandas as pd

In [2]:
titanic = pd.read_csv('titanic.csv')

In [3]:
#Column dot-notation
titanic.age

0      22.0
1      38.0
2      26.0
3      35.0
4      35.0
       ... 
886    27.0
887    19.0
888     NaN
889    26.0
890    32.0
Name: age, Length: 891, dtype: float64

In [4]:
age = titanic['age']

In [5]:
type(age)

pandas.core.series.Series

In [6]:
age.head()

0    22.0
1    38.0
2    26.0
3    35.0
4    35.0
Name: age, dtype: float64

In [7]:
age.tail()

886    27.0
887    19.0
888     NaN
889    26.0
890    32.0
Name: age, dtype: float64

In [8]:
# Get the type of data in Series
age.dtype

dtype('float64')

In [16]:
age.describe()

count    714.000000
mean      29.699118
std       14.526497
min        0.420000
25%       20.125000
50%       28.000000
75%       38.000000
max       80.000000
Name: age, dtype: float64

In [9]:
age.shape

(891,)

In [10]:
len(age)

891

In [11]:
age.index

RangeIndex(start=0, stop=891, step=1)

In [12]:
# we can't use info method with Series
age.info()

AttributeError: 'Series' object has no attribute 'info'

In [13]:
#Convert pandas Series to pandas Dataframe and use info method
age.to_frame()

Unnamed: 0,age
0,22.0
1,38.0
2,26.0
3,35.0
4,35.0
...,...
886,27.0
887,19.0
888,
889,26.0


In [14]:
age.to_frame().info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   age     714 non-null    float64
dtypes: float64(1)
memory usage: 7.1 KB


## Analyzing Numberical Series

In [19]:
age

0      22.0
1      38.0
2      26.0
3      35.0
4      35.0
       ... 
886    27.0
887    19.0
888     NaN
889    26.0
890    32.0
Name: age, Length: 891, dtype: float64

In [20]:
age.describe()

count    714.000000
mean      29.699118
std       14.526497
min        0.420000
25%       20.125000
50%       28.000000
75%       38.000000
max       80.000000
Name: age, dtype: float64

In [18]:
#Get non-missing age
age.count()

714

In [15]:
#With missing values
age.size

891

In [22]:
len(age)

891

In [23]:
age.sum(skipna=False)

nan

In [24]:
sum(age)

nan

In [25]:
age.mean()

29.69911764705882

In [26]:
age.median()

28.0

In [27]:
age.std()

14.526497332334044

In [28]:
age.min()

0.42

In [29]:
age.max()

80.0

In [101]:
#Get the unique value .. remove the replicate
age.unique()

array([22.  , 38.  , 26.  , 35.  ,   nan, 54.  ,  2.  , 27.  , 14.  ,
        4.  , 58.  , 20.  , 39.  , 55.  , 31.  , 34.  , 15.  , 28.  ,
        8.  , 19.  , 40.  , 66.  , 42.  , 21.  , 18.  ,  3.  ,  7.  ,
       49.  , 29.  , 65.  , 28.5 ,  5.  , 11.  , 45.  , 17.  , 32.  ,
       16.  , 25.  ,  0.83, 30.  , 33.  , 23.  , 24.  , 46.  , 59.  ,
       71.  , 37.  , 47.  , 14.5 , 70.5 , 32.5 , 12.  ,  9.  , 36.5 ,
       51.  , 55.5 , 40.5 , 44.  ,  1.  , 61.  , 56.  , 50.  , 36.  ,
       45.5 , 20.5 , 62.  , 41.  , 52.  , 63.  , 23.5 ,  0.92, 43.  ,
       60.  , 10.  , 64.  , 13.  , 48.  ,  0.75, 53.  , 57.  , 80.  ,
       70.  , 24.5 ,  6.  ,  0.67, 30.5 ,  0.42, 34.5 , 74.  ])

In [31]:
# Get the number of unique values include NaN
len(age.unique())

89

In [122]:
age.nunique(dropna=False)

88

In [34]:
#The value and the number of each value
age.value_counts()

24.00    30
22.00    27
18.00    26
19.00    25
30.00    25
         ..
55.50     1
70.50     1
66.00     1
23.50     1
0.42      1
Name: age, Length: 88, dtype: int64

In [36]:
#Sort the value count based on the number of each item (Descending).. descending by default
age.value_counts(sort=True)

24.00    30
22.00    27
18.00    26
19.00    25
30.00    25
         ..
55.50     1
70.50     1
66.00     1
23.50     1
0.42      1
Name: age, Length: 88, dtype: int64

In [35]:
#Sort the value count based on the number of each item (without sorting )
age.value_counts(sort=False)

22.00    27
38.00    11
26.00    18
35.00    18
54.00     8
         ..
0.92      1
0.83      2
0.67      1
70.50     1
0.42      1
Name: age, Length: 88, dtype: int64

In [37]:
age.value_counts(dropna = True)

24.00    30
22.00    27
18.00    26
19.00    25
30.00    25
         ..
55.50     1
70.50     1
66.00     1
23.50     1
0.42      1
Name: age, Length: 88, dtype: int64

In [38]:
#Include Nan
age.value_counts(dropna = False)

NaN      177
24.00     30
22.00     27
18.00     26
28.00     25
        ... 
36.50      1
55.50      1
66.00      1
23.50      1
0.42       1
Name: age, Length: 89, dtype: int64

In [39]:
age.value_counts(ascending = True)

0.42      1
23.50     1
66.00     1
70.50     1
55.50     1
         ..
30.00    25
19.00    25
18.00    26
22.00    27
24.00    30
Name: age, Length: 88, dtype: int64

In [40]:
age.value_counts(ascending=False)

24.00    30
22.00    27
18.00    26
19.00    25
30.00    25
         ..
55.50     1
70.50     1
66.00     1
23.50     1
0.42      1
Name: age, Length: 88, dtype: int64

In [42]:
age.value_counts(ascending=False).equals(age.value_counts(sort=True))

True

In [43]:
age.value_counts(sort=True, dropna=True, ascending=False, normalize=False)

24.00    30
22.00    27
18.00    26
19.00    25
30.00    25
         ..
55.50     1
70.50     1
66.00     1
23.50     1
0.42      1
Name: age, Length: 88, dtype: int64

In [45]:
age.value_counts(sort=True, dropna=True, ascending=False, normalize=True)

24.00    0.042017
22.00    0.037815
18.00    0.036415
19.00    0.035014
30.00    0.035014
           ...   
55.50    0.001401
70.50    0.001401
66.00    0.001401
23.50    0.001401
0.42     0.001401
Name: age, Length: 88, dtype: float64

In [44]:
#Normalize the counts of each value
age.value_counts(normalize=True)

24.00    0.042017
22.00    0.037815
18.00    0.036415
19.00    0.035014
30.00    0.035014
           ...   
55.50    0.001401
70.50    0.001401
66.00    0.001401
23.50    0.001401
0.42     0.001401
Name: age, Length: 88, dtype: float64

In [46]:
30/age.count()

0.04201680672268908

In [50]:
30/age.size

0.03367003367003367

In [51]:
age.value_counts(sort = True, dropna = True, ascending= False, normalize = False, bins = 5)

(16.336, 32.252]    346
(32.252, 48.168]    188
(0.339, 16.336]     100
(48.168, 64.084]     69
(64.084, 80.0]       11
Name: age, dtype: int64

In [53]:
age.value_counts(bins=10)

(16.336, 24.294]    177
(24.294, 32.252]    169
(32.252, 40.21]     118
(40.21, 48.168]      70
(0.339, 8.378]       54
(8.378, 16.336]      46
(48.168, 56.126]     45
(56.126, 64.084]     24
(64.084, 72.042]      9
(72.042, 80.0]        2
Name: age, dtype: int64

In [63]:
age.value_counts(sort=True, dropna=True, ascending=False, normalize=False, bins=5)

(16.336, 32.252]    346
(32.252, 48.168]    188
(0.339, 16.336]     100
(48.168, 64.084]     69
(64.084, 80.0]       11
Name: age, dtype: int64

In [64]:
age.value_counts(sort=True, dropna=True, ascending=False, normalize=False, bins=10)

(16.336, 24.294]    177
(24.294, 32.252]    169
(32.252, 40.21]     118
(40.21, 48.168]      70
(0.339, 8.378]       54
(8.378, 16.336]      46
(48.168, 56.126]     45
(56.126, 64.084]     24
(64.084, 72.042]      9
(72.042, 80.0]        2
Name: age, dtype: int64

## Analyzing non-numerical Series

In [73]:
import pandas as pd

In [74]:
summer = pd.read_csv('summer.csv')

In [75]:
summer.head()

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
0,1896,Athens,Aquatics,Swimming,"HAJOS, Alfred",HUN,Men,100M Freestyle,Gold
1,1896,Athens,Aquatics,Swimming,"HERSCHMANN, Otto",AUT,Men,100M Freestyle,Silver
2,1896,Athens,Aquatics,Swimming,"DRIVAS, Dimitrios",GRE,Men,100M Freestyle For Sailors,Bronze
3,1896,Athens,Aquatics,Swimming,"MALOKINIS, Ioannis",GRE,Men,100M Freestyle For Sailors,Gold
4,1896,Athens,Aquatics,Swimming,"CHASAPIS, Spiridon",GRE,Men,100M Freestyle For Sailors,Silver


In [76]:
summer.tail()

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
31160,2012,London,Wrestling,Wrestling Freestyle,"JANIKOWSKI, Damian",POL,Men,Wg 84 KG,Bronze
31161,2012,London,Wrestling,Wrestling Freestyle,"REZAEI, Ghasem Gholamreza",IRI,Men,Wg 96 KG,Gold
31162,2012,London,Wrestling,Wrestling Freestyle,"TOTROV, Rustam",RUS,Men,Wg 96 KG,Silver
31163,2012,London,Wrestling,Wrestling Freestyle,"ALEKSANYAN, Artur",ARM,Men,Wg 96 KG,Bronze
31164,2012,London,Wrestling,Wrestling Freestyle,"LIDBERG, Jimmy",SWE,Men,Wg 96 KG,Bronze


In [77]:
summer.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31165 entries, 0 to 31164
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Year        31165 non-null  int64 
 1   City        31165 non-null  object
 2   Sport       31165 non-null  object
 3   Discipline  31165 non-null  object
 4   Athlete     31165 non-null  object
 5   Country     31161 non-null  object
 6   Gender      31165 non-null  object
 7   Event       31165 non-null  object
 8   Medal       31165 non-null  object
dtypes: int64(1), object(8)
memory usage: 2.1+ MB


In [79]:
athlete = summer.Athlete

In [80]:
athlete = summer['Athlete']

In [81]:
#info method doesn't work with pandas Series
athlete.info()

AttributeError: 'Series' object has no attribute 'info'

In [82]:
athlete.describe()

count               31165
unique              22762
top       PHELPS, Michael
freq                   22
Name: Athlete, dtype: object

In [84]:
#get type: pandas Series
type(athlete)

pandas.core.series.Series

In [87]:
#dtype= 'O': indicate object (string)
athlete.dtype

dtype('O')

In [88]:
athlete.size

31165

In [90]:
athlete.shape

(31165,)

In [89]:
len(athlete)

31165

In [91]:
#Get the unique value of series (athlete name)
athlete.unique()

array(['HAJOS, Alfred', 'HERSCHMANN, Otto', 'DRIVAS, Dimitrios', ...,
       'TOTROV, Rustam', 'ALEKSANYAN, Artur', 'LIDBERG, Jimmy'],
      dtype=object)

In [92]:
#Get the number of unique value
len(athlete.unique())

22762

In [103]:
# The number of unique value without Nan
athlete.nunique(dropna = False)

22762

In [93]:
#Get the unique value and the number or count of each value
athlete.value_counts()

PHELPS, Michael              22
LATYNINA, Larisa             18
ANDRIANOV, Nikolay           15
ONO, Takashi                 13
MANGIAROTTI, Edoardo         13
                             ..
HOLZDEPPE, Raphael            1
ALLERT, Ejlert Arild Emil     1
VERSTRAETEN, Joseph           1
KIM, Jiyeon                   1
SARKÖZI, Gabor                1
Name: Athlete, Length: 22762, dtype: int64

In [94]:
athlete.min()

'AABYE, Edgar'

In [95]:
athlete.max()

'ÖSTRAND, Per-Olof'

In [96]:
athlete.value_counts(sort=True, ascending=True)

SARKÖZI, Gabor                1
KIM, Jiyeon                   1
VERSTRAETEN, Joseph           1
ALLERT, Ejlert Arild Emil     1
HOLZDEPPE, Raphael            1
                             ..
MANGIAROTTI, Edoardo         13
ONO, Takashi                 13
ANDRIANOV, Nikolay           15
LATYNINA, Larisa             18
PHELPS, Michael              22
Name: Athlete, Length: 22762, dtype: int64

In [98]:
athlete.value_counts(sort=True, ascending=False, normalize=True).headd()

PHELPS, Michael         0.000706
LATYNINA, Larisa        0.000578
ANDRIANOV, Nikolay      0.000481
ONO, Takashi            0.000417
MANGIAROTTI, Edoardo    0.000417
Name: Athlete, dtype: float64

In [105]:
import numpy as np

In [107]:
df = pd.DataFrame({
    'A':list('abcdef'),
    'D':[np.nan,5,3,3,5,3]
})
print(df)

   A    D
0  a  NaN
1  b  5.0
2  c  3.0
3  d  3.0
4  e  5.0
5  f  3.0


In [110]:
a = df.A
d = df.D

In [118]:
# The number of unique value
a.nunique()

6

In [117]:
#The number of unique value without NaN
d.nunique()

2

In [120]:
#Get the unique value
a.unique()

array(['a', 'b', 'c', 'd', 'e', 'f'], dtype=object)

In [121]:
#Get the unique value include NaN
d.unique()

array([nan,  5.,  3.])

## Create Pandas Series


### A) From DataFrame

In [1]:
import pandas as pd

In [2]:
summer = pd.read_csv('summer.csv')

In [3]:
summer['Athlete']

0                    HAJOS, Alfred
1                 HERSCHMANN, Otto
2                DRIVAS, Dimitrios
3               MALOKINIS, Ioannis
4               CHASAPIS, Spiridon
                   ...            
31160           JANIKOWSKI, Damian
31161    REZAEI, Ghasem Gholamreza
31162               TOTROV, Rustam
31163            ALEKSANYAN, Artur
31164               LIDBERG, Jimmy
Name: Athlete, Length: 31165, dtype: object

In [4]:
summer.Athlete

0                    HAJOS, Alfred
1                 HERSCHMANN, Otto
2                DRIVAS, Dimitrios
3               MALOKINIS, Ioannis
4               CHASAPIS, Spiridon
                   ...            
31160           JANIKOWSKI, Damian
31161    REZAEI, Ghasem Gholamreza
31162               TOTROV, Rustam
31163            ALEKSANYAN, Artur
31164               LIDBERG, Jimmy
Name: Athlete, Length: 31165, dtype: object

In [9]:
summer.iloc[0]

Year                    1896
City                  Athens
Sport               Aquatics
Discipline          Swimming
Athlete        HAJOS, Alfred
Country                  HUN
Gender                   Men
Event         100M Freestyle
Medal                   Gold
Name: 0, dtype: object

In [17]:
pd.read_csv('summer.csv', usecols=['Athlete'], squeeze= True)

0                    HAJOS, Alfred
1                 HERSCHMANN, Otto
2                DRIVAS, Dimitrios
3               MALOKINIS, Ioannis
4               CHASAPIS, Spiridon
                   ...            
31160           JANIKOWSKI, Damian
31161    REZAEI, Ghasem Gholamreza
31162               TOTROV, Rustam
31163            ALEKSANYAN, Artur
31164               LIDBERG, Jimmy
Name: Athlete, Length: 31165, dtype: object

### B) From scratch

In [18]:
pd.Series([18,2,6,11,109])

0     18
1      2
2      6
3     11
4    109
dtype: int64

In [21]:
pd.Series([13,16,25,29,35], index=['Mon','Tues','Wed','Thrus','Fri'], name='Sales')

Mon      13
Tues     16
Wed      25
Thrus    29
Fri      35
Name: Sales, dtype: int64

### c) From numpy

In [22]:
import numpy as np
import pandas as pd

In [23]:
sales= np.array([1,2,5,2,11,18])
sales

array([ 1,  2,  5,  2, 11, 18])

In [24]:
pd.Series(sales)

0     1
1     2
2     5
3     2
4    11
5    18
dtype: int32

### D) From List

In [26]:
sales = [3,5,6,12,32,54]
sales

[3, 5, 6, 12, 32, 54]

In [27]:
pd.Series(sales)

0     3
1     5
2     6
3    12
4    32
5    54
dtype: int64

### E) From Dictionary

In [28]:
dic = {'Mon':25,'Tues':74, 'Wed': 82, 'Thrus': 192, 'Fri': 251}
dic

{'Mon': 25, 'Tues': 74, 'Wed': 82, 'Thrus': 192, 'Fri': 251}

In [29]:
pd.Series(dic)

Mon       25
Tues      74
Wed       82
Thrus    192
Fri      251
dtype: int64

In [30]:
pd.Series(dic, index=['Fri','Sat','Sun','Mon', 'Tues', 'Wed'])

Fri     251.0
Sat       NaN
Sun       NaN
Mon      25.0
Tues     74.0
Wed      82.0
dtype: float64

In [32]:
pd.Series(dic, index=[1,2,3,4,5])

1   NaN
2   NaN
3   NaN
4   NaN
5   NaN
dtype: float64

In [31]:
pd.Series(dic, index=[1,2,3,4,5,6,7])

1   NaN
2   NaN
3   NaN
4   NaN
5   NaN
6   NaN
7   NaN
dtype: float64

## Indexing and Slicing

In [33]:
import pandas as pd

In [34]:
titanic = pd.read_csv('titanic.csv')

In [35]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [36]:
titanic.tail()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
886,0,2,male,27.0,0,0,13.0,S,
887,1,1,female,19.0,0,0,30.0,S,B
888,0,3,female,,1,2,23.45,S,
889,1,1,male,26.0,0,0,30.0,C,C
890,0,3,male,32.0,0,0,7.75,Q,


In [37]:
age = titanic.age

In [38]:
age.head()

0    22.0
1    38.0
2    26.0
3    35.0
4    35.0
Name: age, dtype: float64

In [39]:
age.tail()

886    27.0
887    19.0
888     NaN
889    26.0
890    32.0
Name: age, dtype: float64

In [41]:
age.index

RangeIndex(start=0, stop=891, step=1)

In [42]:
age[0]

22.0

In [43]:
age[2]

26.0

In [45]:
#The last value in Series
age.iloc[-1]

32.0

In [46]:
age[890]

32.0

In [47]:
age[[3,4]]

3    35.0
4    35.0
Name: age, dtype: float64

In [49]:
age.iloc[:3]

0    22.0
1    38.0
2    26.0
Name: age, dtype: float64

In [50]:
summer = pd.read_csv('summer.csv', index_col = 'Athlete')

In [51]:
summer.head()

Unnamed: 0_level_0,Year,City,Sport,Discipline,Country,Gender,Event,Medal
Athlete,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"HAJOS, Alfred",1896,Athens,Aquatics,Swimming,HUN,Men,100M Freestyle,Gold
"HERSCHMANN, Otto",1896,Athens,Aquatics,Swimming,AUT,Men,100M Freestyle,Silver
"DRIVAS, Dimitrios",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Bronze
"MALOKINIS, Ioannis",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Gold
"CHASAPIS, Spiridon",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Silver


In [52]:
summer.tail()

Unnamed: 0_level_0,Year,City,Sport,Discipline,Country,Gender,Event,Medal
Athlete,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"JANIKOWSKI, Damian",2012,London,Wrestling,Wrestling Freestyle,POL,Men,Wg 84 KG,Bronze
"REZAEI, Ghasem Gholamreza",2012,London,Wrestling,Wrestling Freestyle,IRI,Men,Wg 96 KG,Gold
"TOTROV, Rustam",2012,London,Wrestling,Wrestling Freestyle,RUS,Men,Wg 96 KG,Silver
"ALEKSANYAN, Artur",2012,London,Wrestling,Wrestling Freestyle,ARM,Men,Wg 96 KG,Bronze
"LIDBERG, Jimmy",2012,London,Wrestling,Wrestling Freestyle,SWE,Men,Wg 96 KG,Bronze


In [55]:
event = summer.Event

In [56]:
event.head()

Athlete
HAJOS, Alfred                     100M Freestyle
HERSCHMANN, Otto                  100M Freestyle
DRIVAS, Dimitrios     100M Freestyle For Sailors
MALOKINIS, Ioannis    100M Freestyle For Sailors
CHASAPIS, Spiridon    100M Freestyle For Sailors
Name: Event, dtype: object

In [57]:
event.tail()

Athlete
JANIKOWSKI, Damian           Wg 84 KG
REZAEI, Ghasem Gholamreza    Wg 96 KG
TOTROV, Rustam               Wg 96 KG
ALEKSANYAN, Artur            Wg 96 KG
LIDBERG, Jimmy               Wg 96 KG
Name: Event, dtype: object

In [58]:
type(event)

pandas.core.series.Series

In [59]:
event.info()

AttributeError: 'Series' object has no attribute 'info'

In [60]:
event.index

Index(['HAJOS, Alfred', 'HERSCHMANN, Otto', 'DRIVAS, Dimitrios',
       'MALOKINIS, Ioannis', 'CHASAPIS, Spiridon', 'CHOROPHAS, Efstathios',
       'HAJOS, Alfred', 'ANDREOU, Joannis', 'CHOROPHAS, Efstathios',
       'NEUMANN, Paul',
       ...
       'AHMADOV, Emin', 'KAZAKEVIC, Aleksandr', 'KHUGAEV, Alan',
       'EBRAHIM, Karam Mohamed Gaber', 'GAJIYEV, Danyal', 'JANIKOWSKI, Damian',
       'REZAEI, Ghasem Gholamreza', 'TOTROV, Rustam', 'ALEKSANYAN, Artur',
       'LIDBERG, Jimmy'],
      dtype='object', name='Athlete', length=31165)

In [61]:
event[0]

'100M Freestyle'

In [62]:
event[1]

'100M Freestyle'

In [64]:
event.iloc[-1]

'Wg 96 KG'

In [65]:
event.iloc[:3]

Athlete
HAJOS, Alfred                    100M Freestyle
HERSCHMANN, Otto                 100M Freestyle
DRIVAS, Dimitrios    100M Freestyle For Sailors
Name: Event, dtype: object

In [66]:
event['DRIVAS, Dimitrios']

'100M Freestyle For Sailors'

In [67]:
event[:'DRIVAS, Dimitrios']

Athlete
HAJOS, Alfred                    100M Freestyle
HERSCHMANN, Otto                 100M Freestyle
DRIVAS, Dimitrios    100M Freestyle For Sailors
Name: Event, dtype: object

In [69]:
event.loc['PHELPS, Michael']

Athlete
PHELPS, Michael            100M Butterfly
PHELPS, Michael            200M Butterfly
PHELPS, Michael            200M Freestyle
PHELPS, Michael    200M Individual Medley
PHELPS, Michael    400M Individual Medley
PHELPS, Michael    4X100M Freestyle Relay
PHELPS, Michael       4X100M Medley Relay
PHELPS, Michael    4X200M Freestyle Relay
PHELPS, Michael            100M Butterfly
PHELPS, Michael            200M Butterfly
PHELPS, Michael            200M Freestyle
PHELPS, Michael    200M Individual Medley
PHELPS, Michael    400M Individual Medley
PHELPS, Michael    4X100M Freestyle Relay
PHELPS, Michael       4X100M Medley Relay
PHELPS, Michael    4X200M Freestyle Relay
PHELPS, Michael            100M Butterfly
PHELPS, Michael            200M Butterfly
PHELPS, Michael               200M Medley
PHELPS, Michael          4X100M Freestyle
PHELPS, Michael             4X100M Medley
PHELPS, Michael          4X200M Freestyle
Name: Event, dtype: object

In [71]:
event.loc['PHELPS, Michael'].equals(event['PHELPS, Michael'])

True

In [70]:
event.loc[['PHELPS, Michael','LEWIS, Carl']]

Athlete
PHELPS, Michael            100M Butterfly
PHELPS, Michael            200M Butterfly
PHELPS, Michael            200M Freestyle
PHELPS, Michael    200M Individual Medley
PHELPS, Michael    400M Individual Medley
PHELPS, Michael    4X100M Freestyle Relay
PHELPS, Michael       4X100M Medley Relay
PHELPS, Michael    4X200M Freestyle Relay
PHELPS, Michael            100M Butterfly
PHELPS, Michael            200M Butterfly
PHELPS, Michael            200M Freestyle
PHELPS, Michael    200M Individual Medley
PHELPS, Michael    400M Individual Medley
PHELPS, Michael    4X100M Freestyle Relay
PHELPS, Michael       4X100M Medley Relay
PHELPS, Michael    4X200M Freestyle Relay
PHELPS, Michael            100M Butterfly
PHELPS, Michael            200M Butterfly
PHELPS, Michael               200M Medley
PHELPS, Michael          4X100M Freestyle
PHELPS, Michael             4X100M Medley
PHELPS, Michael          4X200M Freestyle
LEWIS, Carl                          100M
LEWIS, Carl               

## Sorting and introdution to the inplace-parameter

In [72]:
import pandas as pd

In [74]:
dic = {1:10, 3:43, 2:6, 4:36, 5:2, 6:0, 7:None}
dic

{1: 10, 3: 43, 2: 6, 4: 36, 5: 2, 6: 0, 7: None}

In [76]:
sales = pd.Series(dic)
sales

1    10.0
3    43.0
2     6.0
4    36.0
5     2.0
6     0.0
7     NaN
dtype: float64

In [77]:
#Sort the index column
sales.sort_index()

1    10.0
2     6.0
3    43.0
4    36.0
5     2.0
6     0.0
7     NaN
dtype: float64

In [78]:
sales.sort_index(ascending=True, inplace=True)

In [79]:
sales

1    10.0
2     6.0
3    43.0
4    36.0
5     2.0
6     0.0
7     NaN
dtype: float64

In [80]:
sales.sort_values(inplace=False)

6     0.0
5     2.0
2     6.0
1    10.0
4    36.0
3    43.0
7     NaN
dtype: float64

In [81]:
sales.sort_values(ascending=False, na_position='last', inplace=True)

In [82]:
sales

3    43.0
4    36.0
1    10.0
2     6.0
5     2.0
6     0.0
7     NaN
dtype: float64

In [87]:
dic = {'Mon':10, 'Tue':42, 'Wed':81, 'Thu':251, 'Fri':100}
dic

{'Mon': 10, 'Tue': 42, 'Wed': 81, 'Thu': 251, 'Fri': 100}

In [88]:
sales = pd.Series(dic)

In [89]:
sales

Mon     10
Tue     42
Wed     81
Thu    251
Fri    100
dtype: int64

In [90]:
sales.sort_index(ascending=True)

Fri    100
Mon     10
Thu    251
Tue     42
Wed     81
dtype: int64

## nlargest() & nsmallest()

In [91]:
import pandas as pd

In [92]:
titanic = pd.read_csv('titanic.csv')

In [93]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [94]:
titanic.tail()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
886,0,2,male,27.0,0,0,13.0,S,
887,1,1,female,19.0,0,0,30.0,S,B
888,0,3,female,,1,2,23.45,S,
889,1,1,male,26.0,0,0,30.0,C,C
890,0,3,male,32.0,0,0,7.75,Q,


In [95]:
age = titanic.age

In [96]:
age.head()

0    22.0
1    38.0
2    26.0
3    35.0
4    35.0
Name: age, dtype: float64

In [97]:
age.tail()

886    27.0
887    19.0
888     NaN
889    26.0
890    32.0
Name: age, dtype: float64

In [98]:
age.sort_values(ascending=False).head()

630    80.0
851    74.0
96     71.0
493    71.0
116    70.5
Name: age, dtype: float64

In [99]:
age.sort_values(ascending=True).iloc[:3]

803    0.42
755    0.67
644    0.75
Name: age, dtype: float64

In [108]:
# Get the largest value (descending) (decide number, default =5)
age.nlargest(3)

630    80.0
851    74.0
96     71.0
Name: age, dtype: float64

In [112]:
# Get the smallest value (ascending) (decide number, default =5)
age.nsmallest()

803    0.42
755    0.67
469    0.75
644    0.75
78     0.83
Name: age, dtype: float64

In [111]:
#Get the index of the largest value
age.nlargest(n=3).index[0]

630

In [103]:
# Get the index of the smallest value
age.nsmallest(n=3).index[0]

803

## idxmin() & idxmax()

In [104]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [105]:
#Get the index of max value
titanic.age.idxmax()

630

In [106]:
#Get the index of min value
titanic.age.idxmin()

803

In [113]:
titanic.loc[630]

survived       1
pclass         1
sex         male
age           80
sibsp          0
parch          0
fare          30
embarked       S
deck           A
Name: 630, dtype: object

In [114]:
titanic.loc[titanic.age.idxmin()]

survived         1
pclass           3
sex           male
age           0.42
sibsp            0
parch            1
fare        8.5167
embarked         C
deck           NaN
Name: 803, dtype: object

In [115]:
dic = {'Mon':10, 'Tue':25, 'Wed':6, 'Thu':36, 'Fri':2, 'Sat':0, 'Sum':None}
dic

{'Mon': 10, 'Tue': 25, 'Wed': 6, 'Thu': 36, 'Fri': 2, 'Sat': 0, 'Sum': None}

In [116]:
sales = pd.Series(dic)
sales

Mon    10.0
Tue    25.0
Wed     6.0
Thu    36.0
Fri     2.0
Sat     0.0
Sum     NaN
dtype: float64

In [117]:
sales.sort_values(ascending=True).index[0]

'Sat'

In [118]:
sales.idxmin()

'Sat'

In [119]:
sales.sort_values(ascending=False).index[0]

'Thu'

In [120]:
sales.idxmax()

'Thu'

## Manipulating Series

In [121]:
import pandas as pd

In [123]:
sales = pd.Series([4,65,2,9,100,None,0], index=['Mon','Tue', 'Wed', 'Thu', 'Fri','Sat', 'Sun'])
sales

Mon      4.0
Tue     65.0
Wed      2.0
Thu      9.0
Fri    100.0
Sat      NaN
Sun      0.0
dtype: float64

In [124]:
sales['Sun']

0.0

In [125]:
sales.iloc[3] = 30

In [126]:
sales

Mon      4.0
Tue     65.0
Wed      2.0
Thu     30.0
Fri    100.0
Sat      NaN
Sun      0.0
dtype: float64

In [127]:
(sales/1.1).round(2)

Mon     3.64
Tue    59.09
Wed     1.82
Thu    27.27
Fri    90.91
Sat      NaN
Sun     0.00
dtype: float64