# Pandas Series Data Structure
## coursera python data analysis week 2
### https://www.coursera.org/learn/python-data-analysis/home/week/2

In [1]:
import numpy as np
import pandas as pd

## From list, dictionary

In [2]:
animals = ['dog', 'cat', 'tiger']
pd.Series(animals)
a = pd.Series(animals)
a.index

RangeIndex(start=0, stop=3, step=1)

In [3]:
a

0      dog
1      cat
2    tiger
dtype: object

In [4]:
numbers = [1,2,3]
pd.Series(numbers)

0    1
1    2
2    3
dtype: int64

In [5]:
animals = ['dog','cat',None]
pd.Series(animals)

0     dog
1     cat
2    None
dtype: object

In [6]:
numbers = [1,2,None]
pd.Series(numbers)

0    1.0
1    2.0
2    NaN
dtype: float64

In [7]:
print(np.nan)
print(np.nan == np.nan)
print(np.isnan(np.nan))   # isnan : is not a number 

nan
False
True


In [8]:
dic = {"IT":"computer science", "Eng":"chemical engineering", "Arts":"music"}
s = pd.Series(dic)
s

IT          computer science
Eng     chemical engineering
Arts                   music
dtype: object

## Index of Series

In [9]:
s.index

Index(['IT', 'Eng', 'Arts'], dtype='object')

In [10]:
r = pd.Series(["computer science", "chemical engineering", "music"], \
              index = ["IT", "Eng", "Arts"])
r

IT          computer science
Eng     chemical engineering
Arts                   music
dtype: object

## Querying Series

In [11]:
s

IT          computer science
Eng     chemical engineering
Arts                   music
dtype: object

In [12]:
s.iloc[2]

'music'

In [13]:
s.iloc[:2]

IT         computer science
Eng    chemical engineering
dtype: object

In [14]:
s.loc['Arts']

'music'

In [15]:
s[2]

'music'

In [16]:
s['Arts']

'music'

In [17]:
a = pd.Series([1,2,3,4])
total = np.sum(a)
total

10

## vectorization : measuring speed using timeit

In [18]:
np.random.randint(0,4,10)

array([2, 1, 2, 0, 1, 0, 1, 2, 2, 2])

In [19]:
a = pd.Series(np.random. \
              randint(0,1000,10000))
print(a.head())
print(len(a))

0    984
1    493
2    779
3    895
4    807
dtype: int32
10000


In [20]:
%%timeit -n 100
sum = 0
for item in a:
    sum += item

555 µs ± 94 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [21]:
%%timeit -n 100
sum = np.sum(a)

77.8 µs ± 5.01 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## Iterating Series

In [22]:
a += 2
a.head()

0    986
1    495
2    781
3    897
4    809
dtype: int32

In [23]:
for label, value in a.iteritems():
    a.set_value(label, value+2)
a.head()

  


0    988
1    497
2    783
3    899
4    811
dtype: int32

In [24]:
%%timeit -n 10
a = pd.Series(np.random.randint(0,1000,10000))
for label, value in a.iteritems():
    a.set_value(label, value+2)

  This is separate from the ipykernel package so we can avoid doing imports until


29.5 ms ± 1.36 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [25]:
%%timeit -n 10
a = pd.Series(np.random.randint(0,1000,10000))
a += 2

190 µs ± 31.2 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [26]:
s = pd.Series([1,2,3])
print(s)
s.loc['Animal'] = 'dog'
s

0    1
1    2
2    3
dtype: int64


0           1
1           2
2           3
Animal    dog
dtype: object

In [27]:
r = pd.Series(["computer science", "chemical engineering", "music"], index = ["IT", "Eng", "Arts"])
t = pd.Series(["English"], index = ["Language"])
all = r.append(t)

In [28]:
all

IT              computer science
Eng         chemical engineering
Arts                       music
Language                 English
dtype: object

# Pandas DataFrame Data Structure

In [29]:
student1 = pd.Series({"name":"kim", "dept":"cs", "year":2})
student2 = pd.Series({"name":"park", "dept":"ee", "year":2})
student3 = pd.Series({"name":"choi", "dept":"media", "year":2})

In [30]:
student1

name    kim
dept     cs
year      2
dtype: object

In [31]:
group = pd.DataFrame([student1, student2, student3])

In [32]:
group

Unnamed: 0,name,dept,year
0,kim,cs,2
1,park,ee,2
2,choi,media,2


In [33]:
group.index

RangeIndex(start=0, stop=3, step=1)

In [34]:
group.columns

Index(['name', 'dept', 'year'], dtype='object')

In [35]:
group1 = pd.DataFrame([student1, student2, student3], \
                      index = ["member1", "member2", "member3"])

In [36]:
group1

Unnamed: 0,name,dept,year
member1,kim,cs,2
member2,park,ee,2
member3,choi,media,2


In [37]:
group1.loc["member2"]

name    park
dept      ee
year       2
Name: member2, dtype: object

In [38]:
type(group1.loc["member2"])

pandas.core.series.Series

## access columns

In [39]:
group1['dept']

member1       cs
member2       ee
member3    media
Name: dept, dtype: object

In [40]:
group1[['dept', 'year']]

Unnamed: 0,dept,year
member1,cs,2
member2,ee,2
member3,media,2


In [52]:
gCols = group1.columns
print(gCols)

Index(['name', 'dept', 'year'], dtype='object')


In [53]:
col = group1.columns == "year"
print(col)
gCols[col]

[False False  True]


Index(['year'], dtype='object')

In [54]:
group1[gCols[col]]

Unnamed: 0,year
member1,2
member2,2
member3,2


In [55]:
group1[group1.columns[group1.columns != 'year']]

Unnamed: 0,name,dept
member1,kim,cs
member2,park,ee
member3,choi,media


In [48]:
group1[[True, False, True]]

Unnamed: 0,name,dept,year
member1,kim,cs,2
member3,choi,media,2


In [50]:
group1[["year", "name"]]

Unnamed: 0,year,name
member1,2,kim
member2,2,park
member3,2,choi


## chain operation and slicing

In [56]:
group1

Unnamed: 0,name,dept,year
member1,kim,cs,2
member2,park,ee,2
member3,choi,media,2


In [216]:
print(group1.loc['member2', 'dept'])
print(group1.loc['member2']['dept'])

ee
ee


In [217]:
group1.loc[:,['dept', 'year']]

Unnamed: 0,dept,year
member1,cs,2
member2,ee,2
member3,media,2


In [59]:
group1.iloc[:2]

Unnamed: 0,name,dept,year
member1,kim,cs,2
member2,park,ee,2


In [60]:
group1.iloc[:2,['dept', 'year']]

TypeError: cannot perform reduce with flexible type

## drop row and del column

In [61]:
group1

Unnamed: 0,name,dept,year
member1,kim,cs,2
member2,park,ee,2
member3,choi,media,2


In [62]:
group1.drop('member1')

Unnamed: 0,name,dept,year
member2,park,ee,2
member3,choi,media,2


In [63]:
group1

Unnamed: 0,name,dept,year
member1,kim,cs,2
member2,park,ee,2
member3,choi,media,2


In [72]:
copy_group1 = group1.copy()
print("before drop copy_group1:\n", copy_group1)
copy_group1 = copy_group1.drop('member1')
print("\nafter drop copy_group1:\n",copy_group1)
print("\nafter drop group1:\n",group1)


before drop copy_group1:
          name   dept  year
member1   kim     cs     2
member2  park     ee     2
member3  choi  media     2

after drop copy_group1:
          name   dept  year
member2  park     ee     2
member3  choi  media     2

after drop group1:
          name   dept  year
member1   kim     cs     2
member2  park     ee     2
member3  choi  media     2


In [73]:
copy_group1 = group1
print("before drop copy_group1:\n", copy_group1)
copy_group1 = copy_group1.drop('member1')
print("\nafter drop copy_group1:\n",copy_group1)
print("\nafter drop group1:\n",group1)

before drop copy_group1:
          name   dept  year
member1   kim     cs     2
member2  park     ee     2
member3  choi  media     2

after drop copy_group1:
          name   dept  year
member2  park     ee     2
member3  choi  media     2

after drop group1:
          name   dept  year
member1   kim     cs     2
member2  park     ee     2
member3  choi  media     2


In [74]:
copy_group1

Unnamed: 0,name,dept,year
member2,park,ee,2
member3,choi,media,2


In [75]:
del copy_group1['year']
copy_group1

Unnamed: 0,name,dept
member2,park,ee
member3,choi,media


In [76]:
copy_group1['grades'] = None
copy_group1

Unnamed: 0,name,dept,grades
member2,park,ee,
member3,choi,media,


In [77]:
group1.T

Unnamed: 0,member1,member2,member3
name,kim,park,choi
dept,cs,ee,media
year,2,2,2


In [140]:
group1.T.loc['name']

member1     kim
member2    park
member3    choi
Name: name, dtype: object

# Read csv file

In [110]:
!cat olympics.csv

'cat'은(는) 내부 또는 외부 명령, 실행할 수 있는 프로그램, 또는
배치 파일이 아닙니다.


In [79]:
df = pd.read_csv("olympics.csv")

In [80]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,,№ Summer,01 !,02 !,03 !,Total,№ Winter,01 !,02 !,03 !,Total,№ Games,01 !,02 !,03 !,Combined total
1,Afghanistan (AFG),13,0,0,2,2,0,0,0,0,0,13,0,0,2,2
2,Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15
3,Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70
4,Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12


In [81]:
df = pd.read_csv("olympics.csv",index_col=0)
df.head()

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
,№ Summer,01 !,02 !,03 !,Total,№ Winter,01 !,02 !,03 !,Total,№ Games,01 !,02 !,03 !,Combined total
Afghanistan (AFG),13,0,0,2,2,0,0,0,0,0,13,0,0,2,2
Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15
Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70
Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12


In [82]:
df = pd.read_csv("olympics.csv",index_col=0, skiprows = 1)
df.head()

Unnamed: 0,№ Summer,01 !,02 !,03 !,Total,№ Winter,01 !.1,02 !.1,03 !.1,Total.1,№ Games,01 !.2,02 !.2,03 !.2,Combined total
Afghanistan (AFG),13,0,0,2,2,0,0,0,0,0,13,0,0,2,2
Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15
Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70
Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12
Australasia (ANZ) [ANZ],2,3,4,5,12,0,0,0,0,0,2,3,4,5,12


In [83]:
df.columns

Index(['№ Summer', '01 !', '02 !', '03 !', 'Total', '№ Winter', '01 !.1',
       '02 !.1', '03 !.1', 'Total.1', '№ Games', '01 !.2', '02 !.2', '03 !.2',
       'Combined total'],
      dtype='object')

In [84]:
df.index

Index(['Afghanistan (AFG)', 'Algeria (ALG)', 'Argentina (ARG)',
       'Armenia (ARM)', 'Australasia (ANZ) [ANZ]', 'Australia (AUS) [AUS] [Z]',
       'Austria (AUT)', 'Azerbaijan (AZE)', 'Bahamas (BAH)', 'Bahrain (BRN)',
       ...
       'Uzbekistan (UZB)', 'Venezuela (VEN)', 'Vietnam (VIE)',
       'Virgin Islands (ISV)', 'Yugoslavia (YUG) [YUG]',
       'Independent Olympic Participants (IOP) [IOP]', 'Zambia (ZAM) [ZAM]',
       'Zimbabwe (ZIM) [ZIM]', 'Mixed team (ZZX) [ZZX]', 'Totals'],
      dtype='object', length=147)

## Cleaning Dataframe

In [85]:
for col in df.columns:
    if col[:2] == "01" :
        df.rename(columns={col:'Gold'+ col[4:]}, inplace=True)
    if col[:2] == '02':
        df.rename(columns={col:'Silver'+ col[4:]}, inplace=True)
    if col[:2] == '03':
        df.rename(columns={col:'Bronze'+ col[4:]}, inplace=True)
    if col[:1] == "№":
        df.rename(columns={col:"#"+col[2:]}, inplace=True)
df.head()

Unnamed: 0,#Summer,Gold,Silver,Bronze,Total,#Winter,Gold.1,Silver.1,Bronze.1,Total.1,#Games,Gold.2,Silver.2,Bronze.2,Combined total
Afghanistan (AFG),13,0,0,2,2,0,0,0,0,0,13,0,0,2,2
Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15
Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70
Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12
Australasia (ANZ) [ANZ],2,3,4,5,12,0,0,0,0,0,2,3,4,5,12


## Boolean Masking

In [86]:
df.head()

Unnamed: 0,#Summer,Gold,Silver,Bronze,Total,#Winter,Gold.1,Silver.1,Bronze.1,Total.1,#Games,Gold.2,Silver.2,Bronze.2,Combined total
Afghanistan (AFG),13,0,0,2,2,0,0,0,0,0,13,0,0,2,2
Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15
Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70
Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12
Australasia (ANZ) [ANZ],2,3,4,5,12,0,0,0,0,0,2,3,4,5,12


In [88]:
df['Gold'].head()

Afghanistan (AFG)           0
Algeria (ALG)               5
Argentina (ARG)            18
Armenia (ARM)               1
Australasia (ANZ) [ANZ]     3
Name: Gold, dtype: int64

In [89]:
(df['Gold'] > 0).head()

Afghanistan (AFG)          False
Algeria (ALG)               True
Argentina (ARG)             True
Armenia (ARM)               True
Australasia (ANZ) [ANZ]     True
Name: Gold, dtype: bool

In [91]:
gold = df.where(df['Gold']>0)
gold.head()

Unnamed: 0,#Summer,Gold,Silver,Bronze,Total,#Winter,Gold.1,Silver.1,Bronze.1,Total.1,#Games,Gold.2,Silver.2,Bronze.2,Combined total
Afghanistan (AFG),,,,,,,,,,,,,,,
Algeria (ALG),12.0,5.0,2.0,8.0,15.0,3.0,0.0,0.0,0.0,0.0,15.0,5.0,2.0,8.0,15.0
Argentina (ARG),23.0,18.0,24.0,28.0,70.0,18.0,0.0,0.0,0.0,0.0,41.0,18.0,24.0,28.0,70.0
Armenia (ARM),5.0,1.0,2.0,9.0,12.0,6.0,0.0,0.0,0.0,0.0,11.0,1.0,2.0,9.0,12.0
Australasia (ANZ) [ANZ],2.0,3.0,4.0,5.0,12.0,0.0,0.0,0.0,0.0,0.0,2.0,3.0,4.0,5.0,12.0


In [92]:
gold["Gold"].count()

100

In [93]:
df['Gold'].count()

147

In [94]:
gold1 = gold.dropna()
gold1.head()

Unnamed: 0,#Summer,Gold,Silver,Bronze,Total,#Winter,Gold.1,Silver.1,Bronze.1,Total.1,#Games,Gold.2,Silver.2,Bronze.2,Combined total
Algeria (ALG),12.0,5.0,2.0,8.0,15.0,3.0,0.0,0.0,0.0,0.0,15.0,5.0,2.0,8.0,15.0
Argentina (ARG),23.0,18.0,24.0,28.0,70.0,18.0,0.0,0.0,0.0,0.0,41.0,18.0,24.0,28.0,70.0
Armenia (ARM),5.0,1.0,2.0,9.0,12.0,6.0,0.0,0.0,0.0,0.0,11.0,1.0,2.0,9.0,12.0
Australasia (ANZ) [ANZ],2.0,3.0,4.0,5.0,12.0,0.0,0.0,0.0,0.0,0.0,2.0,3.0,4.0,5.0,12.0
Australia (AUS) [AUS] [Z],25.0,139.0,152.0,177.0,468.0,18.0,5.0,3.0,4.0,12.0,43.0,144.0,155.0,181.0,480.0


In [95]:
gold1['Gold'].count()

100

In [301]:
df.head()

Unnamed: 0,#Summer,Gold,Silver,Bronze,Total,#Winter,Gold.1,Silver.1,Bronze.1,Total.1,#Games,Gold.2,Silver.2,Bronze.2,Combined total
Afghanistan (AFG),13,0,0,2,2,0,0,0,0,0,13,0,0,2,2
Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15
Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70
Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12
Australasia (ANZ) [ANZ],2,3,4,5,12,0,0,0,0,0,2,3,4,5,12


In [98]:
a = df['Gold']>0
a.head()

Afghanistan (AFG)          False
Algeria (ALG)               True
Argentina (ARG)             True
Armenia (ARM)               True
Australasia (ANZ) [ANZ]     True
Name: Gold, dtype: bool

In [99]:
b = df[a]
b.head()

Unnamed: 0,#Summer,Gold,Silver,Bronze,Total,#Winter,Gold.1,Silver.1,Bronze.1,Total.1,#Games,Gold.2,Silver.2,Bronze.2,Combined total
Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15
Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70
Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12
Australasia (ANZ) [ANZ],2,3,4,5,12,0,0,0,0,0,2,3,4,5,12
Australia (AUS) [AUS] [Z],25,139,152,177,468,18,5,3,4,12,43,144,155,181,480


In [100]:
b['Gold'].count()

100

In [101]:
print( (df['Gold.1'] > 0).head())
print(len(df['Gold.1']>0))

Afghanistan (AFG)          False
Algeria (ALG)              False
Argentina (ARG)            False
Armenia (ARM)              False
Australasia (ANZ) [ANZ]    False
Name: Gold.1, dtype: bool
147


In [108]:
df[ (df['Gold'] > 0 ) |  \
   (df['Gold.1'] > 0) \
  ]

Unnamed: 0,#Summer,Gold,Silver,Bronze,Total,#Winter,Gold.1,Silver.1,Bronze.1,Total.1,#Games,Gold.2,Silver.2,Bronze.2,Combined total
Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15
Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70
Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12
Australasia (ANZ) [ANZ],2,3,4,5,12,0,0,0,0,0,2,3,4,5,12
Australia (AUS) [AUS] [Z],25,139,152,177,468,18,5,3,4,12,43,144,155,181,480
Austria (AUT),26,18,33,35,86,22,59,78,81,218,48,77,111,116,304
Azerbaijan (AZE),5,6,5,15,26,5,0,0,0,0,10,6,5,15,26
Bahamas (BAH),15,5,2,5,12,0,0,0,0,0,15,5,2,5,12
Belarus (BLR),5,12,24,39,75,6,6,4,5,15,11,18,28,44,90
Belgium (BEL),25,37,52,53,142,20,1,1,3,5,45,38,53,56,147


In [344]:
len(df[ (df['Gold'] > 0 ) | \
  (df['Gold.1'] > 0) \
  ] )

101

In [345]:
df[ (df['Gold.1'] > 0 ) &  \
  (df['Gold'] == 0) \
  ]

Unnamed: 0,#Summer,Gold,Silver,Bronze,Total,#Winter,Gold.1,Silver.1,Bronze.1,Total.1,#Games,Gold.2,Silver.2,Bronze.2,Combined total
Liechtenstein (LIE),16,0,0,0,0,18,2,2,5,9,34,2,2,5,9


In [346]:
len(df[ (df['Gold.1'] > 0 ) &  \
  (df['Gold'] == 0) \
  ])

1

In [347]:
# Write a query to return all of the names of people who bought products worth more than $3.00.
purchase_1 = pd.Series({'Name': 'Chris',
                        'Item Purchased': 'Dog Food',
                        'Cost': 22.50})
purchase_2 = pd.Series({'Name': 'Kevyn',
                        'Item Purchased': 'Kitty Litter',
                        'Cost': 2.50})
purchase_3 = pd.Series({'Name': 'Vinod',
                        'Item Purchased': 'Bird Seed',
                        'Cost': 5.00})

df = pd.DataFrame([purchase_1, purchase_2, purchase_3], index=['Store 1', 'Store 1', 'Store 2'])

df['Name'][df['Cost']>3]

Store 1    Chris
Store 2    Vinod
Name: Name, dtype: object

In [348]:
df

Unnamed: 0,Name,Item Purchased,Cost
Store 1,Chris,Dog Food,22.5
Store 1,Kevyn,Kitty Litter,2.5
Store 2,Vinod,Bird Seed,5.0


In [349]:
df['Name']

Store 1    Chris
Store 1    Kevyn
Store 2    Vinod
Name: Name, dtype: object

In [350]:
df['Cost']>3


Store 1     True
Store 1    False
Store 2     True
Name: Cost, dtype: bool

## Indexing Dataframe

In [111]:
df = pd.read_csv("olympics.csv",index_col=0, skiprows = 1)
for col in df.columns:
    if col[:2] == "01" :
        df.rename(columns={col:'Gold'+ col[4:]}, inplace=True)
    if col[:2] == '02':
        df.rename(columns={col:'Silver'+ col[4:]}, inplace=True)
    if col[:2] == '03':
        df.rename(columns={col:'Bronze'+ col[4:]}, inplace=True)
    if col[:1] == "№":
        df.rename(columns={col:"#"+col[2:]}, inplace=True)

In [112]:
df.head()

Unnamed: 0,#Summer,Gold,Silver,Bronze,Total,#Winter,Gold.1,Silver.1,Bronze.1,Total.1,#Games,Gold.2,Silver.2,Bronze.2,Combined total
Afghanistan (AFG),13,0,0,2,2,0,0,0,0,0,13,0,0,2,2
Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15
Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70
Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12
Australasia (ANZ) [ANZ],2,3,4,5,12,0,0,0,0,0,2,3,4,5,12


In [113]:
df.index

Index(['Afghanistan (AFG)', 'Algeria (ALG)', 'Argentina (ARG)',
       'Armenia (ARM)', 'Australasia (ANZ) [ANZ]', 'Australia (AUS) [AUS] [Z]',
       'Austria (AUT)', 'Azerbaijan (AZE)', 'Bahamas (BAH)', 'Bahrain (BRN)',
       ...
       'Uzbekistan (UZB)', 'Venezuela (VEN)', 'Vietnam (VIE)',
       'Virgin Islands (ISV)', 'Yugoslavia (YUG) [YUG]',
       'Independent Olympic Participants (IOP) [IOP]', 'Zambia (ZAM) [ZAM]',
       'Zimbabwe (ZIM) [ZIM]', 'Mixed team (ZZX) [ZZX]', 'Totals'],
      dtype='object', length=147)

In [114]:
df['Country'] = df.index
df.head()

Unnamed: 0,#Summer,Gold,Silver,Bronze,Total,#Winter,Gold.1,Silver.1,Bronze.1,Total.1,#Games,Gold.2,Silver.2,Bronze.2,Combined total,Country
Afghanistan (AFG),13,0,0,2,2,0,0,0,0,0,13,0,0,2,2,Afghanistan (AFG)
Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15,Algeria (ALG)
Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70,Argentina (ARG)
Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12,Armenia (ARM)
Australasia (ANZ) [ANZ],2,3,4,5,12,0,0,0,0,0,2,3,4,5,12,Australasia (ANZ) [ANZ]


In [369]:
df = df.set_index('Gold')
df.head()

Unnamed: 0_level_0,#Summer,Silver,Bronze,Total,#Winter,Gold.1,Silver.1,Bronze.1,Total.1,#Games,Gold.2,Silver.2,Bronze.2,Combined total,Country
Gold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
0,13,0,2,2,0,0,0,0,0,13,0,0,2,2,Afghanistan (AFG)
5,12,2,8,15,3,0,0,0,0,15,5,2,8,15,Algeria (ALG)
18,23,24,28,70,18,0,0,0,0,41,18,24,28,70,Argentina (ARG)
1,5,2,9,12,6,0,0,0,0,11,1,2,9,12,Armenia (ARM)
3,2,4,5,12,0,0,0,0,0,2,3,4,5,12,Australasia (ANZ) [ANZ]


In [370]:
df = df.reset_index()
df.head()

Unnamed: 0,Gold,#Summer,Silver,Bronze,Total,#Winter,Gold.1,Silver.1,Bronze.1,Total.1,#Games,Gold.2,Silver.2,Bronze.2,Combined total,Country
0,0,13,0,2,2,0,0,0,0,0,13,0,0,2,2,Afghanistan (AFG)
1,5,12,2,8,15,3,0,0,0,0,15,5,2,8,15,Algeria (ALG)
2,18,23,24,28,70,18,0,0,0,0,41,18,24,28,70,Argentina (ARG)
3,1,5,2,9,12,6,0,0,0,0,11,1,2,9,12,Armenia (ARM)
4,3,2,4,5,12,0,0,0,0,0,2,3,4,5,12,Australasia (ANZ) [ANZ]


In [None]:
# https://github.com/aliciasueyee/Introduction-to-Data-Science-with-Python/blob/master/Assignment%2B2.py