### Pandas series and working with None

In [1]:
import pandas as pd

In [2]:
import numpy as np

In [5]:
series = ['tree', 'bush', 'plant']

In [6]:
series

['tree', 'bush', 'plant']

In [4]:
s = pd.Series(series)

In [5]:
s

0     tree
1     bush
2    plant
dtype: object

In [7]:
s = pd.Series(['tree', 'bush', 3])

In [8]:
s

0    tree
1    bush
2       3
dtype: object

working with None and NaN

In [9]:
s = pd.Series(['tree', 'bush', None])

In [10]:
s

0    tree
1    bush
2    None
dtype: object

In [11]:
np.nan

nan

In [12]:
np.nan == None

False

In [13]:
np.nan == np.nan

False

In [14]:
np.nan = np.nan

In [15]:
np.isnan(np.nan)

True

In [16]:
np.isnan(None)

TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''

In [None]:
np.version.full_version

### Querying in Pandas

In [3]:
sport = {'football' : 'England',
         'basketball': 'USA',
        'hockey': 'Canada',
        'Sumo': 'Japan'}

In [4]:
s = pd.Series(sport)

In [5]:
s

football      England
basketball        USA
hockey         Canada
Sumo            Japan
dtype: object

In [None]:
s.iloc[0]

In [None]:
s.loc['hockey']

In [6]:
s.index

Index(['football', 'basketball', 'hockey', 'Sumo'], dtype='object')

In [None]:
s.loc['Sumo']

In [None]:
i = 0
for ind in s.index:
    if ind != "Sumo":
        i += 1
        print("i = " + str(i))
    else:
        i = ind
        print("i = " + str(i))

In [None]:
list(s.index).index('Sumo')

pandas can figure out a way what is what from the context

In [7]:
s[3]

'Japan'

In [8]:
s['Sumo']

'Japan'

In [9]:
purchase_1 = pd.Series({'Name': 'Chris',
                        'Item Purchased': 'Dog Food',
                        'Cost': 22.50})
purchase_2 = pd.Series({'Name': 'Kevyn',
                        'Item Purchased': 'Kitty Litter',
                        'Cost': 2.50})
purchase_3 = pd.Series({'Name': 'Vinod',
                        'Item Purchased': 'Bird Seed',
                        'Cost': 5.00})
df = pd.DataFrame([purchase_1, purchase_2, purchase_3], index=['Store 1', 'Store 1', 'Store 2'])
df.head()

Unnamed: 0,Name,Item Purchased,Cost
Store 1,Chris,Dog Food,22.5
Store 1,Kevyn,Kitty Litter,2.5
Store 2,Vinod,Bird Seed,5.0


In [None]:
df.loc['Store 1']

In [None]:
df.loc['Store 1'].T

In [None]:
df

In [None]:
df['Cost']

In [None]:
type(df['Cost'])

In [None]:
df.loc['Store 1']['Cost']

In [None]:
df.loc[:, ['Name','Cost']]

In [None]:
df_2 = df.drop('Store 1')
df_2

In [None]:
df

In [None]:
df_2

parameter *inplace=True* allows to do operations without an assignment
it changes the dataset(not makes a copy)

In [None]:
df.drop('Store 1', inplace=True)

In [None]:
df

In [None]:
df

In [None]:
df['Location'] = None
df

In [None]:
cost = df['Cost']

In [None]:
cost

In [None]:
cost +=2

In [None]:
cost = cost + 2

In [None]:
cost

In [None]:
df

### Preparing dataset

In [25]:
# read csv file
df = pd.read_csv('data/olympics.csv')

In [26]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,,№ Summer,01 !,02 !,03 !,Total,№ Winter,01 !,02 !,03 !,Total,№ Games,01 !,02 !,03 !,Combined total
1,Afghanistan (AFG),13,0,0,2,2,0,0,0,0,0,13,0,0,2,2
2,Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15
3,Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70
4,Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12


In [None]:
# the command works in Linux env
!cat olympics.csv

In [27]:
# read csv file and how many rows to skip
df = pd.read_csv('data/olympics.csv', skiprows=1, index_col=0)

In [None]:
df.head()

In [28]:
df.columns

Index(['№ Summer', '01 !', '02 !', '03 !', 'Total', '№ Winter', '01 !.1',
       '02 !.1', '03 !.1', 'Total.1', '№ Games', '01 !.2', '02 !.2', '03 !.2',
       'Combined total'],
      dtype='object')

In [29]:
for col in df.columns:
    if col[:2] == '01':
        df.rename(columns = {col:'Gold' + col[4:]}, inplace = True)
    if col[:2] == '02':
        df.rename(columns = {col:'Silver' + col[4:]}, inplace = True)
    if col[:2] == '03':
        df.rename(columns = {col:'Bronze' + col[4:]}, inplace = True)
    if col[:1] == '№':
        df.rename(columns = {col:'#' + col[1:]}, inplace = True)

df.head()

Unnamed: 0,# Summer,Gold,Silver,Bronze,Total,# Winter,Gold.1,Silver.1,Bronze.1,Total.1,# Games,Gold.2,Silver.2,Bronze.2,Combined total
Afghanistan (AFG),13,0,0,2,2,0,0,0,0,0,13,0,0,2,2
Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15
Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70
Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12
Australasia (ANZ) [ANZ],2,3,4,5,12,0,0,0,0,0,2,3,4,5,12


In [None]:
df['Gold'] > 0

finding the countries that win the gold medal during the Summer Olympics

In [30]:
only_gold= df.where(df['Gold'] > 0)
only_gold

Unnamed: 0,# Summer,Gold,Silver,Bronze,Total,# Winter,Gold.1,Silver.1,Bronze.1,Total.1,# Games,Gold.2,Silver.2,Bronze.2,Combined total
Afghanistan (AFG),,,,,,,,,,,,,,,
Algeria (ALG),12.0,5.0,2.0,8.0,15.0,3.0,0.0,0.0,0.0,0.0,15.0,5.0,2.0,8.0,15.0
Argentina (ARG),23.0,18.0,24.0,28.0,70.0,18.0,0.0,0.0,0.0,0.0,41.0,18.0,24.0,28.0,70.0
Armenia (ARM),5.0,1.0,2.0,9.0,12.0,6.0,0.0,0.0,0.0,0.0,11.0,1.0,2.0,9.0,12.0
Australasia (ANZ) [ANZ],2.0,3.0,4.0,5.0,12.0,0.0,0.0,0.0,0.0,0.0,2.0,3.0,4.0,5.0,12.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Independent Olympic Participants (IOP) [IOP],,,,,,,,,,,,,,,
Zambia (ZAM) [ZAM],,,,,,,,,,,,,,,
Zimbabwe (ZIM) [ZIM],12.0,3.0,4.0,1.0,8.0,1.0,0.0,0.0,0.0,0.0,13.0,3.0,4.0,1.0,8.0
Mixed team (ZZX) [ZZX],3.0,8.0,5.0,4.0,17.0,0.0,0.0,0.0,0.0,0.0,3.0,8.0,5.0,4.0,17.0


In [None]:
only_gold['Gold'].count()

In [None]:
df['Gold'].count()

where clause will leave Nan in your df, but they will not count in statistical operations

In [None]:
only_gold = only_gold.dropna()

In [None]:
only_gold.head()

In [31]:
only_gold = df[df['Gold'] > 0]

In [32]:
only_gold['Gold'].count()

100

In [None]:
only_gold.head()

In [None]:
0 and True

In [18]:
False or 0

0

In [20]:
False or True

True

In [22]:
False or 1

1

In [24]:
1 and False

False

In [35]:
df[(df['Gold'] > 0) | (df['Gold.1'] > 0)]

Unnamed: 0,# Summer,Gold,Silver,Bronze,Total,# Winter,Gold.1,Silver.1,Bronze.1,Total.1,# Games,Gold.2,Silver.2,Bronze.2,Combined total
Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15
Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70
Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12
Australasia (ANZ) [ANZ],2,3,4,5,12,0,0,0,0,0,2,3,4,5,12
Australia (AUS) [AUS] [Z],25,139,152,177,468,18,5,3,4,12,43,144,155,181,480
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Venezuela (VEN),17,2,2,8,12,4,0,0,0,0,21,2,2,8,12
Yugoslavia (YUG) [YUG],16,26,29,28,83,14,0,3,1,4,30,26,32,29,87
Zimbabwe (ZIM) [ZIM],12,3,4,1,8,1,0,0,0,0,13,3,4,1,8
Mixed team (ZZX) [ZZX],3,8,5,4,17,0,0,0,0,0,3,8,5,4,17


In [36]:
# finding total of countries who won on of the Olympics

len(df[(df['Gold'] > 0) | (df['Gold.1'] > 0)])

101

finding the country that win the gold medal during the Winter Olympics, but none gold medals during Summer Olympics

In [None]:
df[(df['Gold'] == 0) & (df['Gold.1'] > 0)]

In [39]:
# assigning index as a new column in the dataframe
df['country'] = df.index

In [40]:
df.head()

Unnamed: 0,# Summer,Gold,Silver,Bronze,Total,# Winter,Gold.1,Silver.1,Bronze.1,Total.1,# Games,Gold.2,Silver.2,Bronze.2,Combined total,country
Afghanistan (AFG),13,0,0,2,2,0,0,0,0,0,13,0,0,2,2,Afghanistan (AFG)
Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15,Algeria (ALG)
Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70,Argentina (ARG)
Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12,Armenia (ARM)
Australasia (ANZ) [ANZ],2,3,4,5,12,0,0,0,0,0,2,3,4,5,12,Australasia (ANZ) [ANZ]


In [41]:
# this is how to reset the index
df = df.reset_index()

In [42]:
df.head()

Unnamed: 0,index,# Summer,Gold,Silver,Bronze,Total,# Winter,Gold.1,Silver.1,Bronze.1,Total.1,# Games,Gold.2,Silver.2,Bronze.2,Combined total,country
0,Afghanistan (AFG),13,0,0,2,2,0,0,0,0,0,13,0,0,2,2,Afghanistan (AFG)
1,Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15,Algeria (ALG)
2,Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70,Argentina (ARG)
3,Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12,Armenia (ARM)
4,Australasia (ANZ) [ANZ],2,3,4,5,12,0,0,0,0,0,2,3,4,5,12,Australasia (ANZ) [ANZ]


In [None]:
df = df.reset_index()

In [None]:
df.head()

In [None]:
df = df.reset_index()

### Multi-level indexing

In [43]:
df = pd.read_csv('data/census.csv')

In [44]:
df.head()

Unnamed: 0,SUMLEV,REGION,DIVISION,STATE,COUNTY,STNAME,CTYNAME,CENSUS2010POP,ESTIMATESBASE2010,POPESTIMATE2010,...,RDOMESTICMIG2011,RDOMESTICMIG2012,RDOMESTICMIG2013,RDOMESTICMIG2014,RDOMESTICMIG2015,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014,RNETMIG2015
0,40,3,6,1,0,Alabama,Alabama,4779736,4780127,4785161,...,0.002295,-0.193196,0.381066,0.582002,-0.467369,1.030015,0.826644,1.383282,1.724718,0.712594
1,50,3,6,1,1,Alabama,Autauga County,54571,54571,54660,...,7.242091,-2.915927,-3.012349,2.265971,-2.530799,7.606016,-2.626146,-2.722002,2.59227,-2.187333
2,50,3,6,1,3,Alabama,Baldwin County,182265,182265,183193,...,14.83296,17.647293,21.845705,19.243287,17.197872,15.844176,18.559627,22.727626,20.317142,18.293499
3,50,3,6,1,5,Alabama,Barbour County,27457,27457,27341,...,-4.728132,-2.50069,-7.056824,-3.904217,-10.543299,-4.874741,-2.758113,-7.167664,-3.978583,-10.543299
4,50,3,6,1,7,Alabama,Bibb County,22915,22919,22861,...,-5.527043,-5.068871,-6.201001,-0.177537,0.177258,-5.088389,-4.363636,-5.403729,0.754533,1.107861


In [None]:
df.columns

In [None]:
df['SUMLEV'].unique()

In [None]:
# filter dataset by county (50)
df = df[df['SUMLEV'] == 50]
df.head()

In [None]:
columns_to_keep = ['STNAME',
                  'CTYNAME',
                  'BIRTHS2010',
                  'BIRTHS2011',
                  'BIRTHS2012',
                  'BIRTHS2013',
                  'BIRTHS2014',
                  'BIRTHS2015',
                  'POPESTIMATE2010',
                  'POPESTIMATE2011',
                  'POPESTIMATE2012',
                  'POPESTIMATE2013',
                  'POPESTIMATE2014',
                  'POPESTIMATE2015']

In [None]:
df = df[columns_to_keep]

In [None]:
df.head()

In [45]:
# setting 2 level index
df = df.set_index(['STNAME', 'CTYNAME'])
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,SUMLEV,REGION,DIVISION,STATE,COUNTY,CENSUS2010POP,ESTIMATESBASE2010,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,...,RDOMESTICMIG2011,RDOMESTICMIG2012,RDOMESTICMIG2013,RDOMESTICMIG2014,RDOMESTICMIG2015,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014,RNETMIG2015
STNAME,CTYNAME,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Alabama,Alabama,40,3,6,1,0,4779736,4780127,4785161,4801108,4816089,...,0.002295,-0.193196,0.381066,0.582002,-0.467369,1.030015,0.826644,1.383282,1.724718,0.712594
Alabama,Autauga County,50,3,6,1,1,54571,54571,54660,55253,55175,...,7.242091,-2.915927,-3.012349,2.265971,-2.530799,7.606016,-2.626146,-2.722002,2.59227,-2.187333
Alabama,Baldwin County,50,3,6,1,3,182265,182265,183193,186659,190396,...,14.83296,17.647293,21.845705,19.243287,17.197872,15.844176,18.559627,22.727626,20.317142,18.293499
Alabama,Barbour County,50,3,6,1,5,27457,27457,27341,27226,27159,...,-4.728132,-2.50069,-7.056824,-3.904217,-10.543299,-4.874741,-2.758113,-7.167664,-3.978583,-10.543299
Alabama,Bibb County,50,3,6,1,7,22915,22919,22861,22733,22642,...,-5.527043,-5.068871,-6.201001,-0.177537,0.177258,-5.088389,-4.363636,-5.403729,0.754533,1.107861


In [46]:
# filter by specific State and County
df.loc['Alabama', 'Barbour County']

  df.loc['Alabama', 'Barbour County']


Unnamed: 0_level_0,Unnamed: 1_level_0,SUMLEV,REGION,DIVISION,STATE,COUNTY,CENSUS2010POP,ESTIMATESBASE2010,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,...,RDOMESTICMIG2011,RDOMESTICMIG2012,RDOMESTICMIG2013,RDOMESTICMIG2014,RDOMESTICMIG2015,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014,RNETMIG2015
STNAME,CTYNAME,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Alabama,Barbour County,50,3,6,1,5,27457,27457,27341,27226,27159,...,-4.728132,-2.50069,-7.056824,-3.904217,-10.543299,-4.874741,-2.758113,-7.167664,-3.978583,-10.543299


In [None]:
df.loc['Alabama']

In [None]:
df.loc['Barbour County']

In [None]:
# export to CSV file
df.to_csv('double_indexing.csv')

In [None]:
df_ind = pd.read_csv('double_indexing.csv')

In [None]:
df_ind.head()

In [None]:
df.loc[[('Alabama', 'Barbour County'),
       ('Alabama', 'Bibb County')]]

### Magic commands

full list here - https://ipython.readthedocs.io/en/stable/interactive/magics.html
% - is a special charachter to invoke a function

In [47]:
%matplotlib inline

In [48]:
# library for a data visualization 
import matplotlib as mt

In [50]:
!cat data/olympics.csv

'cat' is not recognized as an internal or external command,
operable program or batch file.


In [9]:
%%time
a = 2**134
b=  45**234345

Wall time: 86 ms


Wall time vs cpu time

Wall clock time measures how much time has passed, as if you were looking at the clock on your wall. CPU time is how many seconds the CPU was busy. In order to understand performance you want to compare the two

In [11]:
%%timeit -n 1000
a = 2**134
b=  45**2343

72.4 µs ± 13.9 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [12]:
%%time
a = 2**134
b=  45**23434

Wall time: 4 ms


In [15]:
# it simulates 1000 time and calc. average benchmark processing time based on the results
%%timeit -n 1000
a = 2**134
b=  45**2343

The slowest run took 4.06 times longer than the fastest. This could mean that an intermediate result is being cached.
155 µs ± 80.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [16]:
s = pd.Series(np.random.randint(0, 100, 1000))

In [17]:
s

0      13
1      89
2       6
3      59
4       5
       ..
995    50
996     1
997    62
998    41
999    20
Length: 1000, dtype: int32

In [18]:
%%timeit -n 10
s = pd.Series(np.random.randint(0, 100, 1000))
for label, value in s.iteritems():
    s.loc[label] = value + 10

78.5 ms ± 22.6 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [21]:
%%timeit -n 10
s = pd.Series(np.random.randint(0, 100, 1000))
for label, value in s.iteritems():
    s.set_value(label, value + 10)

AttributeError: 'Series' object has no attribute 'set_value'

In [None]:
%%timeit -n 10
s = pd.Series(np.random.randint(0, 100, 1000))
s += 10

In [27]:
s

0      33
1      89
2       6
3      59
4       5
       ..
995    50
996     1
997    62
998    41
999    20
Length: 1000, dtype: int32

more info in pdb - https://www.digitalocean.com/community/tutorials/how-to-use-the-python-debugger

In [25]:
import pdb

In [None]:
for label, value in s.iteritems():
    s.loc[label] = value + 10
    pdb.set_trace()
    f = 43

> [1;32m<ipython-input-28-ba6e03ba63bd>[0m(4)[0;36m<module>[1;34m()[0m
[1;32m      1 [1;33m[1;32mfor[0m [0mlabel[0m[1;33m,[0m [0mvalue[0m [1;32min[0m [0ms[0m[1;33m.[0m[0miteritems[0m[1;33m([0m[1;33m)[0m[1;33m:[0m[1;33m[0m[1;33m[0m[0m
[0m[1;32m      2 [1;33m    [0ms[0m[1;33m.[0m[0mloc[0m[1;33m[[0m[0mlabel[0m[1;33m][0m [1;33m=[0m [0mvalue[0m [1;33m+[0m [1;36m10[0m[1;33m[0m[1;33m[0m[0m
[0m[1;32m      3 [1;33m    [0mpdb[0m[1;33m.[0m[0mset_trace[0m[1;33m([0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[0m[1;32m----> 4 [1;33m    [0mf[0m [1;33m=[0m [1;36m43[0m[1;33m[0m[1;33m[0m[0m
[0m
ipdb> label
0
ipdb> value
33


In [None]:
%pdb

In [None]:
s = pd.Series(np.random.randint(0, 100, 1000))

In [None]:
for label, value in s.iteritems():
    s.loc[label] = value + 10
    pdb.set_trace()
    f = 43

### Filling NaN

In [3]:
df = pd.read_csv('data/log.csv')

In [4]:
df

Unnamed: 0,time,user,video,playback position,paused,volume
0,1469974424,cheryl,intro.html,5,False,10.0
1,1469974454,cheryl,intro.html,6,,
2,1469974544,cheryl,intro.html,9,,
3,1469974574,cheryl,intro.html,10,,
4,1469977514,bob,intro.html,1,,
5,1469977544,bob,intro.html,1,,
6,1469977574,bob,intro.html,1,,
7,1469977604,bob,intro.html,1,,
8,1469974604,cheryl,intro.html,11,,
9,1469974694,cheryl,intro.html,14,,


In [8]:
df = df.set_index('time')
df = df.sort_index()

In [9]:
df

Unnamed: 0_level_0,user,video,playback position,paused,volume
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1469974424,cheryl,intro.html,5,False,10.0
1469974424,sue,advanced.html,23,False,10.0
1469974454,cheryl,intro.html,6,,
1469974454,sue,advanced.html,24,,
1469974484,cheryl,intro.html,7,,
1469974514,cheryl,intro.html,8,,
1469974524,sue,advanced.html,25,,
1469974544,cheryl,intro.html,9,,
1469974554,sue,advanced.html,26,,
1469974574,cheryl,intro.html,10,,


In [12]:
df = df.reset_index()

In [13]:
df

Unnamed: 0,index,time,user,video,playback position,paused,volume
0,0,1469974424,cheryl,intro.html,5,False,10.0
1,1,1469974424,sue,advanced.html,23,False,10.0
2,2,1469974454,cheryl,intro.html,6,,
3,3,1469974454,sue,advanced.html,24,,
4,4,1469974484,cheryl,intro.html,7,,
5,5,1469974514,cheryl,intro.html,8,,
6,6,1469974524,sue,advanced.html,25,,
7,7,1469974544,cheryl,intro.html,9,,
8,8,1469974554,sue,advanced.html,26,,
9,9,1469974574,cheryl,intro.html,10,,


In [14]:
df = df.set_index(['time', 'user'])

In [15]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,index,video,playback position,paused,volume
time,user,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1469974424,cheryl,0,intro.html,5,False,10.0
1469974424,sue,1,advanced.html,23,False,10.0
1469974454,cheryl,2,intro.html,6,,
1469974454,sue,3,advanced.html,24,,
1469974484,cheryl,4,intro.html,7,,
1469974514,cheryl,5,intro.html,8,,
1469974524,sue,6,advanced.html,25,,
1469974544,cheryl,7,intro.html,9,,
1469974554,sue,8,advanced.html,26,,
1469974574,cheryl,9,intro.html,10,,


In [16]:
df.fillna?

In [17]:
df = df.fillna(method = 'ffill')

In [18]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,index,video,playback position,paused,volume
time,user,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1469974424,cheryl,0,intro.html,5,False,10.0
1469974424,sue,1,advanced.html,23,False,10.0
1469974454,cheryl,2,intro.html,6,False,10.0
1469974454,sue,3,advanced.html,24,False,10.0
1469974484,cheryl,4,intro.html,7,False,10.0
1469974514,cheryl,5,intro.html,8,False,10.0
1469974524,sue,6,advanced.html,25,False,10.0
1469974544,cheryl,7,intro.html,9,False,10.0
1469974554,sue,8,advanced.html,26,False,10.0
1469974574,cheryl,9,intro.html,10,False,10.0


NAs can also be filled with the series of the same length that may contain the values