# NumPy

In [1]:
my_list = [23,45,234,5]

In [2]:
my_other_list = [34,23,56,3]

In [3]:
my_list+my_other_list

[23, 45, 234, 5, 34, 23, 56, 3]

In [4]:
import numpy as np

In [5]:
my_first_array = np.array(my_list)
my_first_array


array([ 23,  45, 234,   5])

In [6]:
my_2_array = np.array(my_other_list)
my_2_array

array([34, 23, 56,  3])

In [7]:
combined_array = my_first_array + my_2_array
combined_array

array([ 57,  68, 290,   8])

In [8]:
my_first_array

array([ 23,  45, 234,   5])

# Pandas

In [43]:
import pandas as pd

In [10]:
df = pd.DataFrame({"col1":[0,1,3], "col2":[0,0,100]})
df

Unnamed: 0,col1,col2
0,0,0
1,1,0
2,3,100


In [11]:
ar = pd.Series([1,2,3,4,5,6])
print(ar)
ar.index = ['A','B','C','D','E','F']
print(ar)

0    1
1    2
2    3
3    4
4    5
5    6
dtype: int64
A    1
B    2
C    3
D    4
E    5
F    6
dtype: int64


In [12]:
data = [450,650,870]
Sales = pd.Series(data,index=['Don','Mike','Edwin'],name="Sales")
print(Sales.index)
Sales

Index(['Don', 'Mike', 'Edwin'], dtype='object')


Don      450
Mike     650
Edwin    870
Name: Sales, dtype: int64

In [13]:
print(Sales.index)
print(Sales.values)
print(Sales.dtype)

Index(['Don', 'Mike', 'Edwin'], dtype='object')
[450 650 870]
int64


In [14]:
df.to_numpy()

array([[  0,   0],
       [  1,   0],
       [  3, 100]], dtype=int64)

In [15]:
Sales['Mike']

650

In [16]:
Sales[1]

650

In [17]:
Sales[0:2]

Don     450
Mike    650
Name: Sales, dtype: int64

In [18]:
Sales>500

Don      False
Mike      True
Edwin     True
Name: Sales, dtype: bool

In [19]:
Sales[[False,True,True]]

Mike     650
Edwin    870
Name: Sales, dtype: int64

In [20]:
print(Sales)
sales_dict = Sales.to_dict()
sales_dict

Don      450
Mike     650
Edwin    870
Name: Sales, dtype: int64


{'Don': 450, 'Mike': 650, 'Edwin': 870}

In [21]:
sales_ser = pd.Series(sales_dict)
sales_ser

Don      450
Mike     650
Edwin    870
dtype: int64

In [22]:
new_sales = pd.Series(Sales, index=['Don', 'Mike', 'Sally', 'Edwin', 'Lucy'])
new_sales

Don      450.0
Mike     650.0
Sally      NaN
Edwin    870.0
Lucy       NaN
Name: Sales, dtype: float64

In [23]:
new_sales[pd.notna(new_sales)]

Don      450.0
Mike     650.0
Edwin    870.0
Name: Sales, dtype: float64

# Pandas DataFrames

DataFrames are two-dimensional, size-mutable, potentially heterogeneous tabular data structure contains labeled axes.

## Creating DataFrames

### Creating a DataFrame from a dictionary

In [24]:
new_dict = {
    'Name':['Tom','Jane','Steve','Lucy'],
    'Sales':[250,500,350,400],
    'Date':[2022,2020,2021,2022]
}
df = pd.DataFrame(new_dict)
df

Unnamed: 0,Name,Sales,Date
0,Tom,250,2022
1,Jane,500,2020
2,Steve,350,2021
3,Lucy,400,2022


In [25]:
df.values

array([['Tom', 250, 2022],
       ['Jane', 500, 2020],
       ['Steve', 350, 2021],
       ['Lucy', 400, 2022]], dtype=object)

In [26]:
df.index

RangeIndex(start=0, stop=4, step=1)

### Adding custom index

In [27]:
df_index = pd.DataFrame(new_dict, index=['rank1','rank2','rank3','rank4'])
df_index

Unnamed: 0,Name,Sales,Date
rank1,Tom,250,2022
rank2,Jane,500,2020
rank3,Steve,350,2021
rank4,Lucy,400,2022


In [28]:
df_index.index.name="Rank"

In [29]:
df_index

Unnamed: 0_level_0,Name,Sales,Date
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
rank1,Tom,250,2022
rank2,Jane,500,2020
rank3,Steve,350,2021
rank4,Lucy,400,2022


In [30]:
df_index.columns

Index(['Name', 'Sales', 'Date'], dtype='object')

In [31]:
new_dict_v2 = {
    'Name':['Tom','Jane','Steve','Lucy'],
    'Sales':[250,500,350,400],
    'Date': [2022,2020,2021,2022],
    'Rank': ['rank1','rank2','rank3','rank4']
}

df2=pd.DataFrame(new_dict_v2)
df2

Unnamed: 0,Name,Sales,Date,Rank
0,Tom,250,2022,rank1
1,Jane,500,2020,rank2
2,Steve,350,2021,rank3
3,Lucy,400,2022,rank4


In [32]:
df2.set_index("Rank",inplace=True)
# Inplace = True, means that the change to dataframe is made permanent

In [33]:
df2

Unnamed: 0_level_0,Name,Sales,Date
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
rank1,Tom,250,2022
rank2,Jane,500,2020
rank3,Steve,350,2021
rank4,Lucy,400,2022


In [34]:
# Multi Level Indexes (hierarchical indexes)

df2.set_index(['Rank','Name'],inplace=True)
df2

KeyError: "None of ['Rank'] are in the columns"

In [None]:
df2.sort_index(level=["Name","Rank"],ascending=[False,True])

In [None]:
df2.reset_index()

## Subsetting DataFrame

Two ways:

* Square Brackets

* Advanced Mehtods
  * loc
  * iloc

In [None]:
df

In [None]:
print(type(df["Name"]))
print(df["Name"])
print(type(df[["Name"]]))
print(df[["Name"]])

In [None]:
df[df.Name=='Tom']

In [None]:
# Filtering DataFrames
df[(df["Sales"]>300) & (df["Date"]>2020)]

In [None]:
df[df["Date"].isin([2020,2022])]

## loc and iloc

loc[row_label, column_label]
iloc[row_position, column_position]

## loc - Label based access

In [42]:
new_dict_v2 = {
    'Name':['Tom','Jane','Steve','Lucy'],
    'Sales':[250,500,350,400],
    'Date': [2022,2020,2021,2022],
    'Rank': ['rank1','rank2','rank3','rank4']
}

df2=pd.DataFrame(new_dict_v2)
df2_rank=df2.set_index("Rank")
df2_rank

Unnamed: 0_level_0,Name,Sales,Date
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
rank1,Tom,250,2022
rank2,Jane,500,2020
rank3,Steve,350,2021
rank4,Lucy,400,2022


In [36]:
df2_rank.loc["rank1":"rank1",["Name","Sales"]]
# Endpoint is included in loc and iloc

Unnamed: 0_level_0,Name,Sales
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1
rank1,Tom,250


In [37]:
df2_rank.iloc[0:2,[1]]

Unnamed: 0_level_0,Sales
Rank,Unnamed: 1_level_1
rank1,250
rank2,500


In [40]:
df.loc[df.Sales>300,['Name','Sales']]

Unnamed: 0,Name,Sales
1,Jane,500
2,Steve,350
3,Lucy,400


In [43]:
df2_rank.loc[['rank1','rank3']]

Unnamed: 0_level_0,Name,Sales,Date
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
rank1,Tom,250,2022
rank3,Steve,350,2021


### iloc - position based access

If you want to subset Pandas DataFrames based on their position, or index

In [44]:
df2_rank.iloc[:2,2:]

Unnamed: 0_level_0,Date
Rank,Unnamed: 1_level_1
rank1,2022
rank2,2020


# Assignment:
# Pre-defined lists

In [4]:
country = ['United States', 'Australia', 'Japan', 'India', 'Russia', 'Morocco', 'Egypt']
drives_right =  [True, False, False, False, True, True, True]
cars_per_cap = [809, 731, 588, 18, 200, 70, 45]
row_labels = ['US', 'AUS', 'JPN', 'IN', 'RU', 'MOR', 'EG']

In [5]:
#step1: Create dictionary my_dict with three key:value pairs: my_dict
my_dict = {"Country":country,"Drives_Right":drives_right,"cars_per_cap":cars_per_cap}
my_dict

{'Country': ['United States',
  'Australia',
  'Japan',
  'India',
  'Russia',
  'Morocco',
  'Egypt'],
 'Drives_Right': [True, False, False, False, True, True, True],
 'cars_per_cap': [809, 731, 588, 18, 200, 70, 45]}

In [6]:
df_a = pd.DataFrame(my_dict)
df_a

Unnamed: 0,Country,Drives_Right,cars_per_cap
0,United States,True,809
1,Australia,False,731
2,Japan,False,588
3,India,False,18
4,Russia,True,200
5,Morocco,True,70
6,Egypt,True,45


In [7]:
df_a.index = row_labels

In [8]:
df_a

Unnamed: 0,Country,Drives_Right,cars_per_cap
US,United States,True,809
AUS,Australia,False,731
JPN,Japan,False,588
IN,India,False,18
RU,Russia,True,200
MOR,Morocco,True,70
EG,Egypt,True,45


In [9]:
print(type(df_a['Country']))
df_a['Country']

<class 'pandas.core.series.Series'>


US     United States
AUS        Australia
JPN            Japan
IN             India
RU            Russia
MOR          Morocco
EG             Egypt
Name: Country, dtype: object

In [10]:
print(type(df_a[['Country']]))
df_a[['Country']]

<class 'pandas.core.frame.DataFrame'>


Unnamed: 0,Country
US,United States
AUS,Australia
JPN,Japan
IN,India
RU,Russia
MOR,Morocco
EG,Egypt


In [11]:
df_a[['Country','Drives_Right']]

Unnamed: 0,Country,Drives_Right
US,United States,True
AUS,Australia,False
JPN,Japan,False
IN,India,False
RU,Russia,True
MOR,Morocco,True
EG,Egypt,True


In [12]:
df_a[:3]

Unnamed: 0,Country,Drives_Right,cars_per_cap
US,United States,True,809
AUS,Australia,False,731
JPN,Japan,False,588


In [16]:
# df_a.iloc[3:6]
df_a.iloc[[3,4,5],:]

Unnamed: 0,Country,Drives_Right,cars_per_cap
IN,India,False,18
RU,Russia,True,200
MOR,Morocco,True,70


In [60]:
df_a[df_a.Country == "Japan"]

Unnamed: 0,Country,Drives_Right,cars_per_cap
JPN,Japan,False,588


In [62]:
df_a[df_a.Country.isin(["Australia","Egypt"])]

Unnamed: 0,Country,Drives_Right,cars_per_cap
AUS,Australia,False,731
EG,Egypt,True,45


In [63]:
df_a.loc[df_a.Country=="Morocco",['Drives_Right']]

Unnamed: 0,Drives_Right
MOR,True


# Pandas Series Exercises

Completing Exercises from w3resource

In [None]:
# 1)

pd.Series([1,2,3,4,5])

In [None]:
# 2)
series = pd.Series([1,2,3,4,5])
l_series = series.to_list()
print(l_series)
print(type(l_series))

In [None]:
# 3)
s1 = pd.Series([1,2,3,4,0])
s2 = pd.Series([2,3,4,5,2])
print(s1+s2)
print(s2-s1)
print(s1*s2)
print(s2/s1)

In [None]:
# 4)
s1 = pd.Series([1,2,3,4,5])
s2 = pd.Series([2,3,4,5,6])
s1>s2

In [None]:
# 5)
diction = {'a': 100, 'b': 200, 'c': 300, 'd': 400, 'e': 800}
series = pd.Series(diction)
series

In [None]:
# 6)
np_array = np.array([10,20,30,40,50])
series = pd.Series(np_array)
series

In [None]:
# 7)
series = pd.Series([100,200,'python',300.12,400])
print(series)
print(pd.to_numeric(series, errors='coerce'))

In [None]:
# 8)
d = {'col1': [1, 2, 3, 4, 7, 11], 'col2': [4, 5, 6, 9, 5, 0], 'col3': [7, 5, 8, 12, 1,11]}
df = pd.DataFrame(d)
print(df)
series = pd.Series(df['col1'])
series

In [None]:
# 9)
series = pd.Series([100,200,'python',300.12,400])
print(series)
ar = np.array(series)
print(ar)
print(ar.dtype)

In [None]:
# 10
s = pd.Series([
    ['Red', 'Green', 'White'],
    ['Red', 'Black'],
    ['Yellow']])
s = s.apply(pd.Series).stack().reset_index(drop=True)
s

In [None]:
# 11
series = pd.Series(['100','200','python','300.12','400'])
series = series.sort_values(ascending=True)
series

In [None]:
# 12
series = pd.Series(['100','200','python','300.12','400'])
series = series.append(pd.Series(['500', 'php']))
series

In [None]:
# 13
series = pd.Series([0,1,2,3,4,5,6,7,8,9,10])
sub_series = series[0:6]
sub_series

In [None]:
# 14
series = pd.Series([1,2,3,4,5],index=['A','B','C','D','E'])
print(series)
series = series.reindex(index = ['B','A','C','D','E'])
series

In [None]:
# 15
series = pd.Series([1,2,3,4,5,6,7,8,9,5,3])
print(series.mean())
print(series.std())

In [None]:
# 16
s1 = pd.Series([1,2,3,4,5])
s2 = pd.Series([2,4,6,8,10])

s1[~s1.isin(s2)]

In [None]:
# 17
s1 = pd.Series([1,2,3,4,5])
s2 = pd.Series([2,4,6,8,10])

pd.concat([s1[~s1.isin(s2)], s2[~s2.isin(s1)]]).reset_index(drop=True)

In [None]:
# 18
num_state = np.random.RandomState(100)
num_series = pd.Series(num_state.normal(10, 4, 20))
print(num_series.min())
print(num_series.quantile(0.25))
print(num_series.median())
print(num_series.quantile(0.75))
print(num_series.max())

In [None]:
# 19
num_series = pd.Series(np.take(list('0123456789'), np.random.randint(10, size=40)))
num_series.value_counts()

In [None]:
# 20
np.random.RandomState(100)
num_series = pd.Series(np.random.randint(1, 5, [15]))
# Not finished

# Pandas DataFrames Exercises

Exercises on DataFrames from w3resource on pandas dataframes

### Creating a dataframe from a dictionary

In [18]:
# 1
df = pd.DataFrame({'X':[78,85,96,80,86], 'Y':[84,94,89,83,86],'Z':[86,97,96,72,83]})
print(df)


    X   Y   Z
0  78  84  86
1  85  94  97
2  96  89  96
3  80  83  72
4  86  86  83


### Creating dataframe from dictionary with specified labels
Write a Pandas program to create and display a DataFrame from a specified dictionary data which has the index labels.

In [19]:
# 2
exam_data = {'name': ['Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew', 'Laura', 'Kevin', 'Jonas'],
             'score': [12.5, 9, 16.5, np.nan, 9, 20, 14.5, np.nan, 8, 19],
             'attempts': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
             'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']}
labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
df = pd.DataFrame(exam_data,index=labels)
df

Unnamed: 0,name,score,attempts,qualify
a,Anastasia,12.5,1,yes
b,Dima,9.0,3,no
c,Katherine,16.5,2,yes
d,James,,3,no
e,Emily,9.0,2,no
f,Michael,20.0,3,yes
g,Matthew,14.5,1,yes
h,Laura,,1,no
i,Kevin,8.0,2,no
j,Jonas,19.0,1,yes


### Summary DataFrame Info
Write a Pandas program to display a summary of the basic information about a specified DataFrame and its data.

In [20]:
# 3
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, a to j
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   name      10 non-null     object 
 1   score     8 non-null      float64
 2   attempts  10 non-null     int64  
 3   qualify   10 non-null     object 
dtypes: float64(1), int64(1), object(2)
memory usage: 400.0+ bytes


### Get first 3 rows of DataFrame
Write a Pandas program to get the first 3 rows of a given DataFrame

In [21]:
# 4
df[:3]

Unnamed: 0,name,score,attempts,qualify
a,Anastasia,12.5,1,yes
b,Dima,9.0,3,no
c,Katherine,16.5,2,yes


### Select specific columns from DataFrame
Write a Pandas program to select the 'name' and 'score' columns from the following DataFrame

In [25]:
# 5
df.filter(['name', 'score'])
#  or
df[['name','score']]

Unnamed: 0,name,score
a,Anastasia,12.5
b,Dima,9.0
c,Katherine,16.5
d,James,
e,Emily,9.0
f,Michael,20.0
g,Matthew,14.5
h,Laura,
i,Kevin,8.0
j,Jonas,19.0


### Select specified columns and rows from DataFrame
Write a Pandas program to select the specified columns and rows from a given data frame

In [29]:
# 6
df.iloc[[1,3,5,6]][['name','score']]
# Better than their solution

Unnamed: 0,name,score
b,Dima,9.0
d,James,
f,Michael,20.0
g,Matthew,14.5


### Select rows where column value is greater than 2
Write a Pandas program to select the rows where the number of attempts in the examination is greater than 2.

In [30]:
# 7
df[df.attempts > 2]

Unnamed: 0,name,score,attempts,qualify
b,Dima,9.0,3,no
d,James,,3,no
f,Michael,20.0,3,yes


### Count number of rows and columns of DataFrame
Write a Pandas program to count the number of rows and columns of a DataFrame.

In [32]:
# 8
print(f'Number of Rows: {len(df.index)}')
print(f'Number of Columns: {len(df.columns)}')

Number of Rows: 10
Number of Columns: 4


### Select only rows where column value is missing
Write a Pandas program to select the rows where the score is missing, i.e. is NaN.

In [45]:
# 9
df[df['score'].isna()]

Unnamed: 0,name,score,attempts,qualify
d,James,,3,no
h,Laura,,1,no


### Select rows where column value is between two numbers
Write a Pandas program to select the rows the score is between 15 and 20 (inclusive).

In [None]:
# 10
df.loc[(df.score>=15)&(df.score<=20)]

### Select rows with condition on multiple columns
Write a Pandas program to select the rows where number of attempts in the examination is less than 2 and score greater than 15.

In [50]:
# 11
df.loc[(df.attempts<2)&(df.score>=15)]

Unnamed: 0,name,score,attempts,qualify
j,Jonas,19.0,1,yes


### Update col value in row specified by index
Write a Pandas program to change the score in row 'd' to 11.5.

In [54]:
# 12
df.loc['d','score'] = 11.5
df

Unnamed: 0,name,score,attempts,qualify
a,Anastasia,12.5,1,yes
b,Dima,9.0,3,no
c,Katherine,16.5,2,yes
d,James,11.5,3,no
e,Emily,9.0,2,no
f,Michael,20.0,3,yes
g,Matthew,14.5,1,yes
h,Laura,,1,no
i,Kevin,8.0,2,no
j,Jonas,19.0,1,yes


### Sum values in specific column
Write a Pandas program to calculate the sum of the examination attempts by the students

In [60]:
# 13
sum(df.attempts)

19

### Find Mean of values in specific column
Write a Pandas program to calculate the mean score for each different student in DataFrame

In [61]:
# 14
df.score.mean()

13.333333333333334

### Add new row to dataframe, then delete the same row
Write a Pandas program to append a new row 'k' to data frame with given values for each column. Now delete the new row and return the original DataFrame.

In [66]:
# 15
df.loc['k'] = ["Suresh",15.5,1,"yes"]
print(df)
df.drop(['k'], inplace=True)
print(df)

        name  score  attempts qualify
a  Anastasia   12.5         1     yes
b       Dima    9.0         3      no
c  Katherine   16.5         2     yes
d      James   11.5         3      no
e      Emily    9.0         2      no
f    Michael   20.0         3     yes
g    Matthew   14.5         1     yes
h      Laura    NaN         1      no
i      Kevin    8.0         2      no
j      Jonas   19.0         1     yes
k     Suresh   15.5         1     yes
        name  score  attempts qualify
a  Anastasia   12.5         1     yes
b       Dima    9.0         3      no
c  Katherine   16.5         2     yes
d      James   11.5         3      no
e      Emily    9.0         2      no
f    Michael   20.0         3     yes
g    Matthew   14.5         1     yes
h      Laura    NaN         1      no
i      Kevin    8.0         2      no
j      Jonas   19.0         1     yes


### Sort DataFrame by specific columns
Write a Pandas program to sort the DataFrame first by 'name' in descending order, then by 'score' in ascending order.

In [69]:
# 16
df.sort_values(['name','score'],ascending=[False,True])

Unnamed: 0,name,score,attempts,qualify
f,Michael,20.0,3,yes
g,Matthew,14.5,1,yes
h,Laura,,1,no
i,Kevin,8.0,2,no
c,Katherine,16.5,2,yes
j,Jonas,19.0,1,yes
d,James,11.5,3,no
e,Emily,9.0,2,no
b,Dima,9.0,3,no
a,Anastasia,12.5,1,yes


### Replace values in column with other values
Write a Pandas program to replace the 'qualify' column contains the values 'yes' and 'no' with True and False.

In [72]:
# 17
df['qualify'].replace(['yes','no'],[True,False])

a     True
b    False
c     True
d    False
e    False
f     True
g     True
h    False
i    False
j     True
Name: qualify, dtype: bool

### Replace specific value in column
Write a Pandas program to change the name 'James' to 'Suresh' in name column of the DataFrame.

In [73]:
# 18
df['name'].replace('James','Suresh')

a    Anastasia
b         Dima
c    Katherine
d       Suresh
e        Emily
f      Michael
g      Matthew
h        Laura
i        Kevin
j        Jonas
Name: name, dtype: object

### Delete (Drop) Column from DataFrame
Write a Pandas program to delete the 'attempts' column from the DataFrame.

In [74]:
# 19
df.drop(['attempts'], axis=1)

Unnamed: 0,name,score,qualify
a,Anastasia,12.5,yes
b,Dima,9.0,no
c,Katherine,16.5,yes
d,James,11.5,no
e,Emily,9.0,no
f,Michael,20.0,yes
g,Matthew,14.5,yes
h,Laura,,no
i,Kevin,8.0,no
j,Jonas,19.0,yes


### Insert new column into DataFrame
Write a Pandas program to insert a new column in existing DataFrame

In [75]:
# 20
df['color'] = ['Red','Blue','Orange','Red','White','White','Blue','Green','Green','Red']
df

Unnamed: 0,name,score,attempts,qualify,color
a,Anastasia,12.5,1,yes,Red
b,Dima,9.0,3,no,Blue
c,Katherine,16.5,2,yes,Orange
d,James,11.5,3,no,Red
e,Emily,9.0,2,no,White
f,Michael,20.0,3,yes,White
g,Matthew,14.5,1,yes,Blue
h,Laura,,1,no,Green
i,Kevin,8.0,2,no,Green
j,Jonas,19.0,1,yes,Red


### Iterate over rows in DataFrame
Write a Pandas program to iterate over rows in a DataFrame.

In [76]:
# 21
exam_data = {'name':'Anastasia', 'score':12.5}, {'name':'Dima','score':9}, {'name':'Katherine','score':16.5}
df = pd.DataFrame(exam_data)
for index, row in df.iterrows():
    print(f'{row["name"]} {row["score"]}')

Anastasia 12.5
Dima 9.0
Katherine 16.5


### Get Column headers as list
Write a Pandas program to get list from DataFrame column headers.

In [79]:
# 22
exam_data = {'name': ['Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew', 'Laura', 'Kevin', 'Jonas'],
             'score': [12.5, 9, 16.5, np.nan, 9, 20, 14.5, np.nan, 8, 19],
             'attempts': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
             'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']}
labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
df = pd.DataFrame(exam_data,index=labels)

df.columns.to_list()

['name', 'score', 'attempts', 'qualify']

### Rename columns of DataFrame
Write a Pandas program to rename columns of a given DataFrame

In [80]:
# 23
d = {'col1': [1, 2, 3], 'col2': [4, 5, 6], 'col3': [7, 8, 9]}
df = pd.DataFrame(data=d)
df.rename(columns={'col1':'Column1','col2':'Column2','col3':'Column3'})

Unnamed: 0,Column1,Column2,Column3
0,1,4,7
1,2,5,8
2,3,6,9


### Select rows based on column value being equal to some value
Write a Pandas program to select rows from a given DataFrame based on values in some columns.

In [84]:
# 24
d = {'col1': [1, 4, 3, 4, 5], 'col2': [4, 5, 6, 7, 8], 'col3': [7, 8, 9, 0, 1]}
df = pd.DataFrame(data=d)
df[df.col1 == 4]

Unnamed: 0,col1,col2,col3
1,4,5,8
3,4,7,0


### Reorder DataFrame columns
Write a Pandas program to change the order of a DataFrame columns.

In [85]:
# 25
df[['col3','col2','col1']]

Unnamed: 0,col3,col2,col1
0,7,4,1
1,8,5,4
2,9,6,3
3,0,7,4
4,1,8,5


### Add row to existing DataFrame
Write a Pandas program to add one row in an existing DataFrame

In [88]:
# 26
df.append({'col1':10,'col2':11,'col3':12},ignore_index=True)

  df.append({'col1':10,'col2':11,'col3':12},ignore_index=True)


Unnamed: 0,col1,col2,col3
0,1,4,7
1,4,5,8
2,3,6,9
3,4,7,0
4,5,8,1
5,10,11,12


### Write DataFrame to csv with tab separator
Write a Pandas program to write a DataFrame to CSV file using tab separator.

In [None]:
# 27
df.to_csv('new_file.csv',sep='\t',index=False)

### Group by column and count times each grouped value occurs
Write a Pandas program to count city wise number of people from a given of data set (city, name of the person)

In [91]:
# 28
df1 = pd.DataFrame({'name': ['Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew', 'Laura', 'Kevin', 'Jonas'],
                    'city': ['California', 'Los Angeles', 'California', 'California', 'California', 'Los Angeles', 'Los Angeles', 'Georgia', 'Georgia', 'Los Angeles']})
df1.groupby('city').nunique()


Unnamed: 0_level_0,name
city,Unnamed: 1_level_1
California,4
Georgia,2
Los Angeles,4


### Delete rows based on column value being equal to some value
Write a Pandas program to delete DataFrame row(s) based on given column value

In [44]:
# 29
d = {'col1': [1, 4, 3, 4, 5], 'col2': [4, 5, 6, 7, 8], 'col3': [7, 8, 9, 0, 1]}
df = pd.DataFrame(data=d)
df[df.col3 != 8]

Unnamed: 0,col1,col2,col3
0,1,4,7
2,3,6,9
3,4,7,0
4,5,8,1


### Widen output display
Write a Pandas program to widen output display to see more columns.

In [96]:
# 30
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
print("Original DataFrame")
print(df)

Original DataFrame
   col1  col2  col3
0     1     4     7
1     4     5     8
2     3     6     9
3     4     7     0
4     5     8     1


### Select row of DataFrame by int index
Write a Pandas program to select a row of series/dataframe by given integer index.

In [47]:
# 31
df.iloc[[2]]

Unnamed: 0,col1,col2,col3
2,3,6,9


### Replace NaN in column with zeros
Write a Pandas program to replace all the NaN values with Zero's in a column of a dataframe

In [50]:
# 32
exam_data = {'name': ['Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew', 'Laura', 'Kevin', 'Jonas'],
             'score': [12.5, 9, 16.5, np.nan, 9, 20, 14.5, np.nan, 8, 19],
             'attempts': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
             'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']}
df = pd.DataFrame(exam_data)
df.score.fillna(0,inplace=True)
df

Unnamed: 0,name,score,attempts,qualify
0,Anastasia,12.5,1,yes
1,Dima,9.0,3,no
2,Katherine,16.5,2,yes
3,James,0.0,3,no
4,Emily,9.0,2,no
5,Michael,20.0,3,yes
6,Matthew,14.5,1,yes
7,Laura,0.0,1,no
8,Kevin,8.0,2,no
9,Jonas,19.0,1,yes


### Convert Index to a column
Write a Pandas program to convert index to a column of the given dataframe

In [74]:
# 33
exam_data = {'name': ['Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew', 'Laura', 'Kevin', 'Jonas'],
             'score': [12.5, 9, 16.5, np.nan, 9, 20, 14.5, np.nan, 8, 19],
             'attempts': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
             'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']}
df = pd.DataFrame(exam_data)

# Both of these work
df.reset_index()
#df['index'] = df.index

Unnamed: 0,index,name,score,attempts,qualify
0,0,Anastasia,12.5,1,yes
1,1,Dima,9.0,3,no
2,2,Katherine,16.5,2,yes
3,3,James,,3,no
4,4,Emily,9.0,2,no
5,5,Michael,20.0,3,yes
6,6,Matthew,14.5,1,yes
7,7,Laura,,1,no
8,8,Kevin,8.0,2,no
9,9,Jonas,19.0,1,yes


### Set value in cell using index
Write a Pandas program to set a given value for particular cell in  DataFrame using index value

In [73]:
# 34
# Both work, .at is faster if only looking for single values
# df.loc[3,['score']] = 3
df.at[3,'score'] = 3
df

Unnamed: 0,name,score,attempts,qualify
0,Anastasia,12.5,1,yes
1,Dima,9.0,3,no
2,Katherine,16.5,2,yes
3,James,3.0,3,no
4,Emily,9.0,2,no
5,Michael,20.0,3,yes
6,Matthew,14.5,1,yes
7,Laura,,1,no
8,Kevin,8.0,2,no
9,Jonas,19.0,1,yes


### Count NaN values in DataFrame
Write a Pandas program to count the NaN values in one or more columns in DataFrame.

In [76]:
# 35
df.isna().sum()

name        0
score       2
attempts    0
qualify     0
dtype: int64

### Drop list of rows from DataFrame
Write a Pandas program to drop a list of rows from a specified DataFrame.

In [78]:
# 36
d = {'col1': [1, 4, 3, 4, 5], 'col2': [4, 5, 6, 7, 8], 'col3': [7, 8, 9, 0, 1]}
df = pd.DataFrame(d)
df.drop([2,4])

Unnamed: 0,col1,col2,col3
0,1,4,7
1,4,5,8
3,4,7,0


### Reset Index of DataFrame
Write a Pandas program to reset index in a given DataFrame.

In [80]:
# 37
df = pd.DataFrame(exam_data)
df.reset_index()


Unnamed: 0,index,name,score,attempts,qualify
0,0,Anastasia,12.5,1,yes
1,1,Dima,9.0,3,no
2,2,Katherine,16.5,2,yes
3,3,James,,3,no
4,4,Emily,9.0,2,no
5,5,Michael,20.0,3,yes
6,6,Matthew,14.5,1,yes
7,7,Laura,,1,no
8,8,Kevin,8.0,2,no
9,9,Jonas,19.0,1,yes


### Get a random fraction of the DataFrame
Write a Pandas program to divide a DataFrame in a given ratio

In [84]:
# 38
df = pd.DataFrame(np.random.randn(10, 2))
df.sample(frac=0.3)

Unnamed: 0,0,1
6,-1.920027,-1.553593
9,0.927064,-0.755306
4,0.223855,1.231925


### Combine two Series to create DataFrame
Write a Pandas program to combining two series into a DataFrame.

In [86]:
# 39
s1 = pd.Series(['100', '200', 'python', '300.12', '400'])
s2 = pd.Series(['10', '20', 'php', '30.12', '40'])
df = pd.concat([s1,s2], axis=1)
df

Unnamed: 0,0,1
0,100,10
1,200,20
2,python,php
3,300.12,30.12
4,400,40


### Shuffle rows of a DataFrame
Write a Pandas program to shuffle a given DataFrame rows.

In [88]:
# 40
df = pd.DataFrame(exam_data)
df.sample(frac=1)

Unnamed: 0,name,score,attempts,qualify
2,Katherine,16.5,2,yes
6,Matthew,14.5,1,yes
8,Kevin,8.0,2,no
5,Michael,20.0,3,yes
0,Anastasia,12.5,1,yes
1,Dima,9.0,3,no
7,Laura,,1,no
9,Jonas,19.0,1,yes
3,James,,3,no
4,Emily,9.0,2,no


### Convert column type from string to DateTime
Write a Pandas program to convert DataFrame column type from string to datetime.

In [93]:
# 41
s = pd.Series(['3/11/2000', '3/12/2000', '3/13/2000'])
s = pd.to_datetime(s)
s

0   2000-03-11
1   2000-03-12
2   2000-03-13
dtype: datetime64[ns]

### Rename specific column
Write a Pandas program to rename a specific column name in a given DataFrame.

In [98]:
# 42
d = {'col1': [1, 2, 3], 'col2': [4, 5, 6], 'col3': [7, 8, 9]}
df = pd.DataFrame(data=d)
df.rename(columns={'col2':'Column2'},inplace=True)
df

Unnamed: 0,col1,Column2,col3
0,1,4,7
1,2,5,8
2,3,6,9


### Get list of column values
Write a Pandas program to get a list of a specified column of a DataFrame.

In [101]:
# 43
d = {'col1': [1, 2, 3], 'col2': [4, 5, 6], 'col3': [7, 8, 9]}
df = pd.DataFrame(data=d)
# list(df.col2)
df.col2.to_list()

[4, 5, 6]

### Create DataFrame from NumPy array
Write a Pandas program to create a DataFrame from a Numpy array and specify the index column and column headers.

In [103]:
# 44
dtype = [('Column1','int32'), ('Column2','float32'), ('Column3','float32')]
values = np.zeros(15, dtype=dtype)
index = ['Index'+str(i) for i in range(1, len(values)+1)]
df = pd.DataFrame(values,index=index)
df

Unnamed: 0,Column1,Column2,Column3
Index1,0,0.0,0.0
Index2,0,0.0,0.0
Index3,0,0.0,0.0
Index4,0,0.0,0.0
Index5,0,0.0,0.0
Index6,0,0.0,0.0
Index7,0,0.0,0.0
Index8,0,0.0,0.0
Index9,0,0.0,0.0
Index10,0,0.0,0.0


### Find Index of Max value of column
Write a Pandas program to find the row for where the value of a given column is maximum

In [108]:
# 45
d = {'col1': [1, 2, 3, 4, 7], 'col2': [4, 5, 6, 9, 5], 'col3': [7, 8, 12, 1, 11]}
df = pd.DataFrame(data=d)
print(df.idxmax())

col1    4
col2    3
col3    2
dtype: int64


### Check if column is in DataFrame
Write a Pandas program to check whether a given column is present in a DataFrame or not

In [110]:
# 46
col_name = 'col4'
if col_name in df.columns:
    print(f'{col_name} is in DataFrame')
else:
    print(f'{col_name} is not in DataFrame')

col4 is not in DataFrame


### Get specified row value of DataFrame
Write a Pandas program to get the specified row value of a given DataFrame.

In [112]:
# 47
d = {'col1': [1, 2, 3, 4, 7], 'col2': [4, 5, 6, 9, 5], 'col3': [7, 8, 12, 1, 11]}
df = pd.DataFrame(data=d)
print(df.iloc[[0]])
print(df.iloc[[4]])

   col1  col2  col3
0     1     4     7
   col1  col2  col3
4     7     5    11


### Get column DataTypes
Write a Pandas program to get the datatypes of columns of a DataFrame.

In [114]:
# 48
df = pd.DataFrame(exam_data)
df.dtypes

name         object
score       float64
attempts      int64
qualify      object
dtype: object

### Append data to empty DataFrame
Write a Pandas program to append data to an empty DataFrame

In [121]:
# 49
df = pd.DataFrame()
df2 = pd.DataFrame({'col1':[0,1,2],'col2':[0,1,2]})
df.append(df2)

  df.append(df2)


Unnamed: 0,col1,col2
0,0,0
1,1,1
2,2,2


### Sort DataFrame by columns
Write a Pandas program to sort a given DataFrame by two or more columns

In [122]:
# 50
df = pd.DataFrame(exam_data)
df.sort_values(['attempts','name'])

Unnamed: 0,name,score,attempts,qualify
0,Anastasia,12.5,1,yes
9,Jonas,19.0,1,yes
7,Laura,,1,no
6,Matthew,14.5,1,yes
4,Emily,9.0,2,no
2,Katherine,16.5,2,yes
8,Kevin,8.0,2,no
1,Dima,9.0,3,no
3,James,,3,no
5,Michael,20.0,3,yes


### Convert DataType of column from float to int
Write a Pandas program to convert the datatype of a given column (floats to ints)

In [132]:
# 51
exam_data = {'name': ['Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew', 'Laura', 'Kevin', 'Jonas'],
             'score': [12.5, 9.1, 16.5, 12.77, 9.21, 20.22, 14.5, 11.34, 8.8, 19.13],
             'attempts': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
             'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']}
df = pd.DataFrame(exam_data)
df.score = df.score.astype(np.int64)
df.dtypes

name        object
score        int64
attempts     int64
qualify     object
dtype: object

### Remove infinite values from DataFrame
Write a Pandas program to remove infinite values from a given DataFrame.

In [134]:
# 52
df = pd.DataFrame([1000, 2000, 3000, -4000, np.inf, -np.inf])
df.replace([np.inf, -np.inf],np.nan).dropna()

Unnamed: 0,0
0,1000.0
1,2000.0
2,3000.0
3,-4000.0


### Insert Column at specific column index
Write a Pandas program to insert a given column at a specific column index in a DataFrame.

In [139]:
# 53
d = {'col2': [4, 5, 6, 9, 5], 'col3': [7, 8, 12, 1, 11]}
df = pd.DataFrame(data=d)
df.insert(0, 'col1', [1,2,3,4,7])
df

Unnamed: 0,col1,col2,col3
0,1,4,7
1,2,5,8
2,3,6,12
3,4,9,1
4,7,5,11


### Convert list of lists to DataFrame
Write a Pandas program to convert a given list of lists into a Dataframe.

In [141]:
# 54
df = pd.DataFrame([[2, 4], [1, 3]],columns=['col1','col2'])
df

Unnamed: 0,col1,col2
0,2,4
1,1,3


### Group by first column, have second column as lists
Write a Pandas program to group by the first column and get second column as lists in rows

In [145]:
df = pd.DataFrame( {'col1':['C1','C1','C2','C2','C2','C3','C2'], 'col2':[1,2,3,3,4,6,5]})
df.groupby('col1')['col2'].apply(list)

col1
C1          [1, 2]
C2    [3, 3, 4, 5]
C3             [6]
Name: col2, dtype: object

### Get column index given column name
Write a Pandas program to get column index from column name of a given DataFrame

In [149]:
# 56
d = {'col1': [1, 2, 3, 4, 7], 'col2': [4, 5, 6, 9, 5], 'col3': [7, 8, 12, 1, 11]}
df = pd.DataFrame(data=d)
df.columns.get_loc('col2')

1

### Get number of columns in a DataFrame
Write a Pandas program to count number of columns of a DataFrame.

In [151]:
# 57
len(df.columns)

3

### Select all but one specific column
Write a Pandas program to select all columns, except one given column in a DataFrame.

In [154]:
# 58
df.loc[:, df.columns != 'col3']

Unnamed: 0,col1,col2
0,1,4
1,2,5
2,3,6
3,4,9
4,7,5


### Get first n records of a DataFrame
Write a Pandas program to get first n records of a DataFrame.

In [160]:
# 59
n = 4
# df.loc[:n-1]
# or
df.head(n)

Unnamed: 0,col1,col2,col3
0,1,4,7
1,2,5,8
2,3,6,12
3,4,9,1


### Get last n rows
Write a Pandas program to get last n records of a DataFrame.

In [162]:
# 60
n = 2
df.tail(n)

Unnamed: 0,col1,col2,col3
3,4,9,1
4,7,5,11


### Get first largest n rows ordered by column
Write a Pandas program to get topmost n records within each group of a DataFrame

In [164]:
# 61
d = {'col1': [1, 2, 3, 4, 7, 11], 'col2': [4, 5, 6, 9, 5, 0], 'col3': [7, 5, 8, 12, 1,11]}
df = pd.DataFrame(data=d)
df.nlargest(3, 'col1')

Unnamed: 0,col1,col2,col3
5,11,0,11
4,7,5,1
3,4,9,12


### Remove (delete, drop) first n rows
Write a Pandas program to remove first n rows of a given DataFrame.

In [166]:
# 62
d = {'col1': [1, 2, 3, 4, 7, 11], 'col2': [4, 5, 6, 9, 5, 0], 'col3': [7, 5, 8, 12, 1,11]}
df = pd.DataFrame(data=d)
df = df.iloc[3:]
df

Unnamed: 0,col1,col2,col3
3,4,9,12
4,7,5,1
5,11,0,11


### Remove (delete,drop) last n rows
Write a Pandas program to remove last n rows of a given DataFrame

In [167]:
# 63
d = {'col1': [1, 2, 3, 4, 7, 11], 'col2': [4, 5, 6, 9, 5, 0], 'col3': [7, 5, 8, 12, 1,11]}
df = pd.DataFrame(data=d)
df = df.iloc[:len(df.index)-3]
df

Unnamed: 0,col1,col2,col3
0,1,4,7
1,2,5,5
2,3,6,8


### Add prefix or suffix to all columns
Write a Pandas program to add a prefix or suffix to all columns of a given DataFrame.

In [170]:
# 64
df = pd.DataFrame({'W':[68,75,86,80,66],'X':[78,85,96,80,86], 'Y':[84,94,89,83,86],'Z':[86,97,96,72,83]});
print(df.add_prefix('A_'))
print(df.add_suffix('_1'))

   A_W  A_X  A_Y  A_Z
0   68   78   84   86
1   75   85   94   97
2   86   96   89   96
3   80   80   83   72
4   66   86   86   83
   W_1  X_1  Y_1  Z_1
0   68   78   84   86
1   75   85   94   97
2   86   96   89   96
3   80   80   83   72
4   66   86   86   83


# Pandas Filtering Exercises

Example exercises from w3resource about filtering a dataset in pandas

In [10]:
# 1
import pandas as pd

df = pd.read_csv('world_alcohol.csv',header=0)
print(df.columns)
print(df.shape)

Index(['Year', 'WHO region', 'Country', 'Beverage Types', 'Display Value'], dtype='object')
(100, 5)


In [14]:
# 2
print(df.iloc[:2])
print(df.iloc[:,:2])
print(df.loc[:,['Year','Display Value']])

   Year       WHO region   Country Beverage Types  Display Value
0  1986  Western Pacific  Viet Nam           Wine            0.0
1  1986         Americas   Uruguay          Other            0.5
    Year       WHO region
0   1986  Western Pacific
1   1986         Americas
2   1985           Africa
3   1986         Americas
4   1987         Americas
..   ...              ...
95  1984           Africa
96  1985           Europe
97  1984  South-East Asia
98  1984           Africa
99  1985  South-East Asia

[100 rows x 2 columns]
    Year  Display Value
0   1986           0.00
1   1986           0.50
2   1985           1.62
3   1986           4.27
4   1987           1.98
..   ...            ...
95  1984           0.00
96  1985           7.38
97  1984           0.00
98  1984           0.00
99  1985           0.00

[100 rows x 2 columns]


In [20]:
# 3
# Randomly sampling a random number of rows
print(df.sample(np.random.choice(list(range(df.shape[0])))))
# Randomly sampling 2% of total rows
print(df.sample(frac=0.02))

    Year             WHO region      Country Beverage Types  Display Value
29  1986                 Europe        Italy          Other            NaN
20  1986        South-East Asia      Myanmar           Wine           0.00
25  1984  Eastern Mediterranean      Tunisia          Other           0.00
47  1986               Americas       Mexico          Other           0.04
91  1989                 Europe     Bulgaria           Beer           4.43
..   ...                    ...          ...            ...            ...
50  1985                 Europe  Switzerland          Other           0.30
69  1986                 Africa         Togo        Spirits           0.42
36  1987  Eastern Mediterranean        Egypt           Beer           0.07
92  1986                 Africa      Eritrea        Spirits           0.00
40  1987                 Europe      Austria        Spirits           1.90

[65 rows x 5 columns]
    Year             WHO region     Country Beverage Types  Display Value
70 

In [23]:
# 4
df.dropna()

Unnamed: 0,Year,WHO region,Country,Beverage Types,Display Value
0,1986,Western Pacific,Viet Nam,Wine,0.00
1,1986,Americas,Uruguay,Other,0.50
2,1985,Africa,Cte d'Ivoire,Wine,1.62
3,1986,Americas,Colombia,Beer,4.27
4,1987,Americas,Saint Kitts and Nevis,Beer,1.98
...,...,...,...,...,...
95,1984,Africa,Niger,Other,0.00
96,1985,Europe,Luxembourg,Wine,7.38
97,1984,South-East Asia,Indonesia,Wine,0.00
98,1984,Africa,Equatorial Guinea,Wine,0.00


In [25]:
# 5
df.drop_duplicates(subset='WHO region')

Unnamed: 0,Year,WHO region,Country,Beverage Types,Display Value
0,1986,Western Pacific,Viet Nam,Wine,0.0
1,1986,Americas,Uruguay,Other,0.5
2,1985,Africa,Cte d'Ivoire,Wine,1.62
13,1984,Eastern Mediterranean,Afghanistan,Other,0.0
18,1984,Europe,Norway,Spirits,1.62
20,1986,South-East Asia,Myanmar,Wine,0.0


In [33]:
# 6
df[df['Year']==1984]

Unnamed: 0,Year,WHO region,Country,Beverage Types,Display Value
9,1984,Africa,Nigeria,Other,6.1
13,1984,Eastern Mediterranean,Afghanistan,Other,0.0
16,1984,Americas,Costa Rica,Wine,0.06
18,1984,Europe,Norway,Spirits,1.62
19,1984,Africa,Kenya,Beer,1.08
22,1984,Europe,Romania,Spirits,2.67
23,1984,Europe,Turkey,Beer,0.44
25,1984,Eastern Mediterranean,Tunisia,Other,0.0
27,1984,Eastern Mediterranean,Bahrain,Beer,2.22
42,1984,Europe,Ukraine,Spirits,3.06


In [35]:
# 7
df[(df['Year']==1987) | (df['Year']==1989)]

Unnamed: 0,Year,WHO region,Country,Beverage Types,Display Value
4,1987,Americas,Saint Kitts and Nevis,Beer,1.98
5,1987,Americas,Guatemala,Other,0.0
6,1987,Africa,Mauritius,Wine,0.13
10,1987,Africa,Botswana,Wine,0.2
11,1989,Americas,Guatemala,Beer,0.62
15,1987,Africa,Guinea-Bissau,Wine,0.07
17,1989,Africa,Seychelles,Beer,2.23
21,1989,Americas,Costa Rica,Spirits,4.51
28,1987,Western Pacific,Viet Nam,Beer,0.11
32,1989,Africa,Mauritius,Beer,1.6


In [37]:
# 8
df[(df['WHO region']=='Americas') & (df['Year']==1989)]

Unnamed: 0,Year,WHO region,Country,Beverage Types,Display Value
11,1989,Americas,Guatemala,Beer,0.62
21,1989,Americas,Costa Rica,Spirits,4.51
55,1989,Americas,Suriname,Wine,0.04
64,1989,Americas,Bolivia (Plurinational State of),Beer,1.26
78,1989,Americas,Jamaica,Other,0.0


In [38]:
# 9
df[(df['WHO region']=='Western Pacific') & (df['Year']==1986)
   & (df['Country']=='Viet Nam')]

Unnamed: 0,Year,WHO region,Country,Beverage Types,Display Value
0,1986,Western Pacific,Viet Nam,Wine,0.0


In [41]:
# 10
df[(df['Year'].isin([1986,1989])) & (df['WHO region']=='Americas')]

Unnamed: 0,Year,WHO region,Country,Beverage Types,Display Value
1,1986,Americas,Uruguay,Other,0.5
3,1986,Americas,Colombia,Beer,4.27
8,1986,Americas,Antigua and Barbuda,Spirits,1.55
11,1989,Americas,Guatemala,Beer,0.62
21,1989,Americas,Costa Rica,Spirits,4.51
47,1986,Americas,Mexico,Other,0.04
55,1989,Americas,Suriname,Wine,0.04
64,1989,Americas,Bolivia (Plurinational State of),Beer,1.26
74,1986,Americas,Bolivia (Plurinational State of),Spirits,2.06
78,1989,Americas,Jamaica,Other,0.0
