### Creating Dataframes using Pandas 

In [36]:
import pandas as pd

In [115]:
df = pd.read_csv('africa.csv')

In [38]:
df

Unnamed: 0,country,independence,colonizer,coups,successful
0,Kenya,12/12/63,Britain,1,0
1,Nigeria,6/1/61,Britain,8,6
2,Ethiopia,,,5,5
3,Namibia,21/03/1990,Germany,0,_
4,South Sudan,9/7/11,,1,0
5,Senegal,4/4/60,France,1,0
6,Chad,11/8/60,France,7,2
7,Angola,11/11/75,Portugal,1,0
8,DRC,30/06/1960,Belgium,4,2
9,Mozambique,25/06/1975,Portugal,1,1


In [39]:
df.head()

Unnamed: 0,country,independence,colonizer,coups,successful
0,Kenya,12/12/63,Britain,1,0
1,Nigeria,6/1/61,Britain,8,6
2,Ethiopia,,,5,5
3,Namibia,21/03/1990,Germany,0,_
4,South Sudan,9/7/11,,1,0


In [40]:
df.columns

Index(['country', 'independence', 'colonizer', 'coups', 'successful'], dtype='object')

In [41]:
df.shape

(11, 5)

In [42]:
rows, columns = df.shape

In [43]:
rows

11

In [44]:
columns

5

In [47]:
df.head(7)

Unnamed: 0,country,independence,colonizer,coups,successful
0,Kenya,12/12/63,Britain,1,0
1,Nigeria,6/1/61,Britain,8,6
2,Ethiopia,,,5,5
3,Namibia,21/03/1990,Germany,0,_
4,South Sudan,9/7/11,,1,0
5,Senegal,4/4/60,France,1,0
6,Chad,11/8/60,France,7,2


In [49]:
df.tail(3)

Unnamed: 0,country,independence,colonizer,coups,successful
8,DRC,30/06/1960,Belgium,4,2
9,Mozambique,25/06/1975,Portugal,1,1
10,Liberia,26/07/1847,,4,1


In [52]:
df[['country', 'colonizer', 'coups']]

Unnamed: 0,country,colonizer,coups
0,Kenya,Britain,1
1,Nigeria,Britain,8
2,Ethiopia,,5
3,Namibia,Germany,0
4,South Sudan,,1
5,Senegal,France,1
6,Chad,France,7
7,Angola,Portugal,1
8,DRC,Belgium,4
9,Mozambique,Portugal,1


In [53]:
df[3:6]

Unnamed: 0,country,independence,colonizer,coups,successful
3,Namibia,21/03/1990,Germany,0,_
4,South Sudan,9/7/11,,1,0
5,Senegal,4/4/60,France,1,0


In [54]:
df.country

0           Kenya
1         Nigeria
2        Ethiopia
3         Namibia
4     South Sudan
5         Senegal
6            Chad
7          Angola
8             DRC
9      Mozambique
10        Liberia
Name: country, dtype: object

In [55]:
df['country']

0           Kenya
1         Nigeria
2        Ethiopia
3         Namibia
4     South Sudan
5         Senegal
6            Chad
7          Angola
8             DRC
9      Mozambique
10        Liberia
Name: country, dtype: object

#### Basic Descriptive Statistics in Pandas 

In [56]:
df.head()

Unnamed: 0,country,independence,colonizer,coups,successful
0,Kenya,12/12/63,Britain,1,0
1,Nigeria,6/1/61,Britain,8,6
2,Ethiopia,,,5,5
3,Namibia,21/03/1990,Germany,0,_
4,South Sudan,9/7/11,,1,0


In [57]:
df['coups'].max()

8

In [58]:
df['coups'].min()

0

In [59]:
df['coups'].mean()

3.0

In [60]:
df['coups'].std()

2.756809750418044

In [62]:
df.describe()

Unnamed: 0,coups
count,11.0
mean,3.0
std,2.75681
min,0.0
25%,1.0
50%,1.0
75%,4.5
max,8.0


In [116]:
df.head()

Unnamed: 0,country,independence,colonizer,coups,successful
0,Kenya,12/12/63,Britain,1,0
1,Nigeria,6/1/61,Britain,8,6
2,Ethiopia,,,5,5
3,Namibia,21/03/1990,Germany,0,_
4,South Sudan,9/7/11,,1,0


#### Conditionally select data

In [118]:
df[df.coups >= 5]

Unnamed: 0,country,independence,colonizer,coups,successful
1,Nigeria,6/1/61,Britain,8,6
2,Ethiopia,,,5,5
6,Chad,11/8/60,France,7,2


In [119]:
df[df.coups >= 7]

Unnamed: 0,country,independence,colonizer,coups,successful
1,Nigeria,6/1/61,Britain,8,6
6,Chad,11/8/60,France,7,2


In [120]:
df[df['coups'] == df['coups'].max()]

Unnamed: 0,country,independence,colonizer,coups,successful
1,Nigeria,6/1/61,Britain,8,6


In [122]:
df[['country', 'colonizer']][df['coups'] == df['coups'].max()]

Unnamed: 0,country,colonizer
1,Nigeria,Britain


### Indexes 

In [123]:
df.index

RangeIndex(start=0, stop=11, step=1)

In [124]:
df.set_index('independence')

Unnamed: 0_level_0,country,colonizer,coups,successful
independence,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
12/12/63,Kenya,Britain,1,0
6/1/61,Nigeria,Britain,8,6
,Ethiopia,,5,5
21/03/1990,Namibia,Germany,0,_
9/7/11,South Sudan,,1,0
4/4/60,Senegal,France,1,0
11/8/60,Chad,France,7,2
11/11/75,Angola,Portugal,1,0
30/06/1960,DRC,Belgium,4,2
25/06/1975,Mozambique,Portugal,1,1


In [125]:
df

Unnamed: 0,country,independence,colonizer,coups,successful
0,Kenya,12/12/63,Britain,1,0
1,Nigeria,6/1/61,Britain,8,6
2,Ethiopia,,,5,5
3,Namibia,21/03/1990,Germany,0,_
4,South Sudan,9/7/11,,1,0
5,Senegal,4/4/60,France,1,0
6,Chad,11/8/60,France,7,2
7,Angola,11/11/75,Portugal,1,0
8,DRC,30/06/1960,Belgium,4,2
9,Mozambique,25/06/1975,Portugal,1,1


In [126]:
df.set_index('independence', inplace = True)

In [127]:
df

Unnamed: 0_level_0,country,colonizer,coups,successful
independence,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
12/12/63,Kenya,Britain,1,0
6/1/61,Nigeria,Britain,8,6
,Ethiopia,,5,5
21/03/1990,Namibia,Germany,0,_
9/7/11,South Sudan,,1,0
4/4/60,Senegal,France,1,0
11/8/60,Chad,France,7,2
11/11/75,Angola,Portugal,1,0
30/06/1960,DRC,Belgium,4,2
25/06/1975,Mozambique,Portugal,1,1


In [128]:
df

Unnamed: 0_level_0,country,colonizer,coups,successful
independence,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
12/12/63,Kenya,Britain,1,0
6/1/61,Nigeria,Britain,8,6
,Ethiopia,,5,5
21/03/1990,Namibia,Germany,0,_
9/7/11,South Sudan,,1,0
4/4/60,Senegal,France,1,0
11/8/60,Chad,France,7,2
11/11/75,Angola,Portugal,1,0
30/06/1960,DRC,Belgium,4,2
25/06/1975,Mozambique,Portugal,1,1


In [129]:
df.loc['11/11/75']

country         Angola
colonizer     Portugal
coups                1
successful           0
Name: 11/11/75, dtype: object

In [130]:
df

Unnamed: 0_level_0,country,colonizer,coups,successful
independence,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
12/12/63,Kenya,Britain,1,0
6/1/61,Nigeria,Britain,8,6
,Ethiopia,,5,5
21/03/1990,Namibia,Germany,0,_
9/7/11,South Sudan,,1,0
4/4/60,Senegal,France,1,0
11/8/60,Chad,France,7,2
11/11/75,Angola,Portugal,1,0
30/06/1960,DRC,Belgium,4,2
25/06/1975,Mozambique,Portugal,1,1


In [131]:
df.reset_index(inplace = True)

In [132]:
df

Unnamed: 0,independence,country,colonizer,coups,successful
0,12/12/63,Kenya,Britain,1,0
1,6/1/61,Nigeria,Britain,8,6
2,,Ethiopia,,5,5
3,21/03/1990,Namibia,Germany,0,_
4,9/7/11,South Sudan,,1,0
5,4/4/60,Senegal,France,1,0
6,11/8/60,Chad,France,7,2
7,11/11/75,Angola,Portugal,1,0
8,30/06/1960,DRC,Belgium,4,2
9,25/06/1975,Mozambique,Portugal,1,1


In [133]:
df.set_index('colonizer', inplace = True)

In [134]:
df

Unnamed: 0_level_0,independence,country,coups,successful
colonizer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Britain,12/12/63,Kenya,1,0
Britain,6/1/61,Nigeria,8,6
,,Ethiopia,5,5
Germany,21/03/1990,Namibia,0,_
,9/7/11,South Sudan,1,0
France,4/4/60,Senegal,1,0
France,11/8/60,Chad,7,2
Portugal,11/11/75,Angola,1,0
Belgium,30/06/1960,DRC,4,2
Portugal,25/06/1975,Mozambique,1,1


In [135]:
df.loc['Britain']

Unnamed: 0_level_0,independence,country,coups,successful
colonizer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Britain,12/12/63,Kenya,1,0
Britain,6/1/61,Nigeria,8,6


In [136]:
df.loc['France']

Unnamed: 0_level_0,independence,country,coups,successful
colonizer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
France,4/4/60,Senegal,1,0
France,11/8/60,Chad,7,2


In [137]:
df.loc['Portugal']

Unnamed: 0_level_0,independence,country,coups,successful
colonizer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Portugal,11/11/75,Angola,1,0
Portugal,25/06/1975,Mozambique,1,1


In [165]:
df = pd.read_excel('africa.xlsx', 'Sheet1')

In [166]:
df.head()

Unnamed: 0,country,independence,colonizer,coups,successful
0,Kenya,1963-12-12 00:00:00,Britain,1,0
1,Nigeria,1961-06-01 00:00:00,Britain,8,6
2,Ethiopia,,,5,5
3,Namibia,21/03/1990,Germany,0,_
4,South Sudan,2011-09-07 00:00:00,,1,0


In [168]:
df = pd.read_excel('africa.xlsx', 'Sheet1', nrows = 3)

In [170]:
df = pd.read_excel('africa.xlsx', 'Sheet1', na_values = ['NA', '_'])

In [171]:
df

Unnamed: 0,country,independence,colonizer,coups,successful
0,Kenya,1963-12-12 00:00:00,Britain,1,0.0
1,Nigeria,1961-06-01 00:00:00,Britain,8,6.0
2,Ethiopia,,,5,5.0
3,Namibia,21/03/1990,Germany,0,
4,South Sudan,2011-09-07 00:00:00,,1,0.0
5,Senegal,1960-04-04 00:00:00,France,1,0.0
6,Chad,1960-11-08 00:00:00,France,7,2.0
7,Angola,1975-11-11 00:00:00,Portugal,1,0.0
8,DRC,30/06/1960,Belgium,4,2.0
9,Mozambique,25/06/1975,Portugal,1,1.0


In [173]:
df.to_csv('new_africa.csv', index = False)

#### Handling Time data 

In [207]:
df = pd.read_csv('africa.csv', parse_dates = ['independence'], na_values = ['NA', '_'])

  return tools.to_datetime(
  return tools.to_datetime(
  return tools.to_datetime(
  return tools.to_datetime(


In [204]:
df

Unnamed: 0,country,independence,colonizer,coups,successful
0,Kenya,2063-12-12,Britain,1,0.0
1,Nigeria,2061-06-01,Britain,8,6.0
2,Ethiopia,NaT,,5,5.0
3,Namibia,1990-03-21,Germany,0,
4,South Sudan,2011-09-07,,1,0.0
5,Senegal,2060-04-04,France,1,0.0
6,Chad,2060-11-08,France,7,2.0
7,Angola,1975-11-11,Portugal,1,0.0
8,DRC,1960-06-30,Belgium,4,2.0
9,Mozambique,1975-06-25,Portugal,1,1.0


In [201]:
type(df.independence[0])

pandas._libs.tslibs.timestamps.Timestamp

In [205]:
df.isna()

Unnamed: 0,country,independence,colonizer,coups,successful
0,False,False,False,False,False
1,False,False,False,False,False
2,False,True,True,False,False
3,False,False,False,False,True
4,False,False,True,False,False
5,False,False,False,False,False
6,False,False,False,False,False
7,False,False,False,False,False
8,False,False,False,False,False
9,False,False,False,False,False


In [206]:
df.dropna()

Unnamed: 0,country,independence,colonizer,coups,successful
0,Kenya,2063-12-12,Britain,1,0.0
1,Nigeria,2061-06-01,Britain,8,6.0
5,Senegal,2060-04-04,France,1,0.0
6,Chad,2060-11-08,France,7,2.0
7,Angola,1975-11-11,Portugal,1,0.0
8,DRC,1960-06-30,Belgium,4,2.0
9,Mozambique,1975-06-25,Portugal,1,1.0


In [209]:
df['colonizer'].nunique()

5

In [215]:
df.sort_values(by = ['coups'], ascending = True)

Unnamed: 0,country,independence,colonizer,coups,successful
3,Namibia,1990-03-21,Germany,0,
0,Kenya,2063-12-12,Britain,1,0.0
4,South Sudan,2011-09-07,,1,0.0
5,Senegal,2060-04-04,France,1,0.0
7,Angola,1975-11-11,Portugal,1,0.0
9,Mozambique,1975-06-25,Portugal,1,1.0
8,DRC,1960-06-30,Belgium,4,2.0
10,Liberia,1847-07-26,,4,1.0
2,Ethiopia,NaT,,5,5.0
6,Chad,2060-11-08,France,7,2.0


In [214]:
df

Unnamed: 0,country,independence,colonizer,coups,successful
0,Kenya,2063-12-12,Britain,1,0.0
1,Nigeria,2061-06-01,Britain,8,6.0
2,Ethiopia,NaT,,5,5.0
3,Namibia,1990-03-21,Germany,0,
4,South Sudan,2011-09-07,,1,0.0
5,Senegal,2060-04-04,France,1,0.0
6,Chad,2060-11-08,France,7,2.0
7,Angola,1975-11-11,Portugal,1,0.0
8,DRC,1960-06-30,Belgium,4,2.0
9,Mozambique,1975-06-25,Portugal,1,1.0


In [216]:
df.transpose()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
country,Kenya,Nigeria,Ethiopia,Namibia,South Sudan,Senegal,Chad,Angola,DRC,Mozambique,Liberia
independence,2063-12-12 00:00:00,2061-06-01 00:00:00,NaT,1990-03-21 00:00:00,2011-09-07 00:00:00,2060-04-04 00:00:00,2060-11-08 00:00:00,1975-11-11 00:00:00,1960-06-30 00:00:00,1975-06-25 00:00:00,1847-07-26 00:00:00
colonizer,Britain,Britain,,Germany,,France,France,Portugal,Belgium,Portugal,
coups,1,8,5,0,1,1,7,1,4,1,4
successful,0.0,6.0,5.0,,0.0,0.0,2.0,0.0,2.0,1.0,1.0
