In [1]:
# --------    basic operations with a DATA FRAME    --------

# Loading Dataset

import pandas as pd

stats = pd.read_csv('./DataDemographic.csv')    # load datset
stats.head()    # view dataset

Unnamed: 0,Country Name,Country Code,Birth rate,Internet users,Income Group
0,Aruba,ABW,10.244,78.9,High income
1,Afghanistan,AFG,35.253,5.9,Low income
2,Angola,AGO,45.985,19.1,Upper middle income
3,Albania,ALB,12.877,57.2,Upper middle income
4,United Arab Emirates,ARE,11.044,88.0,High income


In [4]:
# Subsetting (review)
stats[['Country Code','Birth rate','Internet users']][4:8]

Unnamed: 0,Country Code,Birth rate,Internet users
4,ARE,11.044,88.0
5,ARG,17.716,59.9
6,ARM,13.308,41.9
7,ATG,16.447,63.4


In [5]:
# --------    Mathematical opeartions    --------
# very similar to Matix operations
result = stats['Birth rate'] * stats['Internet users']

In [6]:
result.head()

0    808.2516
1    207.9927
2    878.3135
3    736.5644
4    971.8720
dtype: float64

In [2]:
# rename to one words
stats.columns = ['CountryName', 'CountryCode', 'BirthRate', 'InternetUsers', 'IncomeGroup']
result_2 = stats.BirthRate * stats.InternetUsers
result_2.head()

0    808.2516
1    207.9927
2    878.3135
3    736.5644
4    971.8720
dtype: float64

In [3]:
# --------    Add a column to the dataframe    --------
# 'stats.MyCalc' short-cut way wont work
stats['MyCalc'] = result_2  # adding a column named 'MyCalc' whic stores our calculated result_2
stats.head()    # checking

Unnamed: 0,CountryName,CountryCode,BirthRate,InternetUsers,IncomeGroup,MyCalc
0,Aruba,ABW,10.244,78.9,High income,808.2516
1,Afghanistan,AFG,35.253,5.9,Low income,207.9927
2,Angola,AGO,45.985,19.1,Upper middle income,878.3135
3,Albania,ALB,12.877,57.2,Upper middle income,736.5644
4,United Arab Emirates,ARE,11.044,88.0,High income,971.872


In [4]:
# adding another column
stats['MyCalc_2'] = stats.BirthRate / stats.InternetUsers
stats.head()    # checking

Unnamed: 0,CountryName,CountryCode,BirthRate,InternetUsers,IncomeGroup,MyCalc,MyCalc_2
0,Aruba,ABW,10.244,78.9,High income,808.2516,0.129835
1,Afghanistan,AFG,35.253,5.9,Low income,207.9927,5.975085
2,Angola,AGO,45.985,19.1,Upper middle income,878.3135,2.407592
3,Albania,ALB,12.877,57.2,Upper middle income,736.5644,0.225122
4,United Arab Emirates,ARE,11.044,88.0,High income,971.872,0.1255


In [5]:
# --------    Remove a column from the dataframe    --------
# you have to specify the axis, i.e. row = 1, column = 0
    # axis : {0 or 'index', 1 or 'columns'}, default 0, i.e. removes row by default
# parameter-name, use the name of the parameter if the order is known, eg. stats.drop('MyCalc', 1)
    # eg: 'axis', 'index', 'labels' etc
stats.drop('MyCalc', axis=1)    # removes column called 'MyCalc'

Unnamed: 0,CountryName,CountryCode,BirthRate,InternetUsers,IncomeGroup,MyCalc_2
0,Aruba,ABW,10.244,78.9,High income,0.129835
1,Afghanistan,AFG,35.253,5.9,Low income,5.975085
2,Angola,AGO,45.985,19.1,Upper middle income,2.407592
3,Albania,ALB,12.877,57.2,Upper middle income,0.225122
4,United Arab Emirates,ARE,11.044,88.0,High income,0.125500
...,...,...,...,...,...,...
190,"Yemen, Rep.",YEM,32.947,20.0,Lower middle income,1.647350
191,South Africa,ZAF,20.850,46.5,Upper middle income,0.448387
192,"Congo, Dem. Rep.",COD,42.394,2.2,Low income,19.270000
193,Zambia,ZMB,40.471,15.4,Lower middle income,2.627987


In [6]:
stats.drop(index=[0, 1, 2])    # removes first 3 rows

Unnamed: 0,CountryName,CountryCode,BirthRate,InternetUsers,IncomeGroup,MyCalc,MyCalc_2
3,Albania,ALB,12.877,57.2,Upper middle income,736.5644,0.225122
4,United Arab Emirates,ARE,11.044,88.0,High income,971.8720,0.125500
5,Argentina,ARG,17.716,59.9,High income,1061.1884,0.295760
6,Armenia,ARM,13.308,41.9,Lower middle income,557.6052,0.317613
7,Antigua and Barbuda,ATG,16.447,63.4,High income,1042.7398,0.259416
...,...,...,...,...,...,...,...
190,"Yemen, Rep.",YEM,32.947,20.0,Lower middle income,658.9400,1.647350
191,South Africa,ZAF,20.850,46.5,Upper middle income,969.5250,0.448387
192,"Congo, Dem. Rep.",COD,42.394,2.2,Low income,93.2668,19.270000
193,Zambia,ZMB,40.471,15.4,Lower middle income,623.2534,2.627987


In [8]:
# 'stats' itself is unchanged
stats.head()

Unnamed: 0,CountryName,CountryCode,BirthRate,InternetUsers,IncomeGroup,MyCalc,MyCalc_2
0,Aruba,ABW,10.244,78.9,High income,808.2516,0.129835
1,Afghanistan,AFG,35.253,5.9,Low income,207.9927,5.975085
2,Angola,AGO,45.985,19.1,Upper middle income,878.3135,2.407592
3,Albania,ALB,12.877,57.2,Upper middle income,736.5644,0.225122
4,United Arab Emirates,ARE,11.044,88.0,High income,971.872,0.1255


In [9]:
stats.drop(columns=['MyCalc_2'])    # removes column called 'MyCalc'

Unnamed: 0,CountryName,CountryCode,BirthRate,InternetUsers,IncomeGroup,MyCalc
0,Aruba,ABW,10.244,78.9,High income,808.2516
1,Afghanistan,AFG,35.253,5.9,Low income,207.9927
2,Angola,AGO,45.985,19.1,Upper middle income,878.3135
3,Albania,ALB,12.877,57.2,Upper middle income,736.5644
4,United Arab Emirates,ARE,11.044,88.0,High income,971.8720
...,...,...,...,...,...,...
190,"Yemen, Rep.",YEM,32.947,20.0,Lower middle income,658.9400
191,South Africa,ZAF,20.850,46.5,Upper middle income,969.5250
192,"Congo, Dem. Rep.",COD,42.394,2.2,Low income,93.2668
193,Zambia,ZMB,40.471,15.4,Lower middle income,623.2534


In [10]:

# remove multiple columns
stats.drop(columns=['CountryName', 'BirthRate'])    # removes columns 'CountryName', 'BirthRate'


Unnamed: 0,CountryCode,InternetUsers,IncomeGroup,MyCalc,MyCalc_2
0,ABW,78.9,High income,808.2516,0.129835
1,AFG,5.9,Low income,207.9927,5.975085
2,AGO,19.1,Upper middle income,878.3135,2.407592
3,ALB,57.2,Upper middle income,736.5644,0.225122
4,ARE,88.0,High income,971.8720,0.125500
...,...,...,...,...,...
190,YEM,20.0,Lower middle income,658.9400,1.647350
191,ZAF,46.5,Upper middle income,969.5250,0.448387
192,COD,2.2,Low income,93.2668,19.270000
193,ZMB,15.4,Lower middle income,623.2534,2.627987


In [11]:

# ----  use assignmnet to update  ----
# Note that applying 'drop()' will return a new object
# so 'stats' is not changed, to update it we need 'assignment statement'
stats = stats.drop(index=[0, 1, 2], columns=['CountryName', 'BirthRate'])

In [12]:
stats.head()

Unnamed: 0,CountryCode,InternetUsers,IncomeGroup,MyCalc,MyCalc_2
3,ALB,57.2,Upper middle income,736.5644,0.225122
4,ARE,88.0,High income,971.872,0.1255
5,ARG,59.9,High income,1061.1884,0.29576
6,ARM,41.9,Lower middle income,557.6052,0.317613
7,ATG,63.4,High income,1042.7398,0.259416
