## Useful properties and methods

In [1]:
import pandas as pd

## Create DF:

In [2]:
data = pd.read_csv("../../datasets/various/drinks.csv")
data.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,AS
1,Albania,89,132,54,4.9,EU
2,Algeria,25,0,14,0.7,AF
3,Andorra,245,138,312,12.4,EU
4,Angola,217,57,45,5.9,AF


## get indexes and columns

In [3]:
index_labels = data.index
print(index_labels)
# print(list(index_labels))

RangeIndex(start=0, stop=193, step=1)


In [4]:
column_labels = data.columns
print(column_labels)
# print(list(column_labels))

Index(['country', 'beer_servings', 'spirit_servings', 'wine_servings',
       'total_litres_of_pure_alcohol', 'continent'],
      dtype='object')


## DF shape

In [5]:
data.shape

(193, 6)

## DF info

In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 193 entries, 0 to 192
Data columns (total 6 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   country                       193 non-null    object 
 1   beer_servings                 193 non-null    int64  
 2   spirit_servings               193 non-null    int64  
 3   wine_servings                 193 non-null    int64  
 4   total_litres_of_pure_alcohol  193 non-null    float64
 5   continent                     170 non-null    object 
dtypes: float64(1), int64(3), object(2)
memory usage: 9.2+ KB


## DF describe

In [7]:
data.describe()

Unnamed: 0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol
count,193.0,193.0,193.0,193.0
mean,106.160622,80.994819,49.450777,4.717098
std,101.143103,88.284312,79.697598,3.773298
min,0.0,0.0,0.0,0.0
25%,20.0,4.0,1.0,1.3
50%,76.0,56.0,8.0,4.2
75%,188.0,128.0,59.0,7.2
max,376.0,438.0,370.0,14.4


In [8]:
# note that all parameters in describe, are also defined as a separate functions
print('the mean values for all columns:')
print( data.mean() )

print('\nthe mean value for "wine_servings" column')
print( data['wine_servings'].mean() )

the mean values for all columns:
beer_servings                   106.160622
spirit_servings                  80.994819
wine_servings                    49.450777
total_litres_of_pure_alcohol      4.717098
dtype: float64

the mean value for "wine_servings" column
49.45077720207254


## Selecting by max/min values in DF

### Get maximum values of columns

In [9]:
# Get maximum values of every column
data.max()

country                         Zimbabwe
beer_servings                        376
spirit_servings                      438
wine_servings                        370
total_litres_of_pure_alcohol        14.4
dtype: object

In [10]:
# find max values for wine, beer and pure_alcohol
max_wine_servings = data.wine_servings.max()
print(f'max_wine_servings: {max_wine_servings}')

max_pure_alcohol = data.total_litres_of_pure_alcohol.max()
print(f'max_pure_alcohol: {max_pure_alcohol}')

max_beer_servings = data.beer_servings.max()
print(f'max_beer_servings: {max_beer_servings}')

max_wine_servings: 370
max_pure_alcohol: 14.4
max_beer_servings: 376


In [11]:
# select row for country with the max wine servings:
data[data.wine_servings == max_wine_servings]

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
61,France,127,151,370,11.8,EU


### Get max values of rows

must use df.max(axis=1)

In [12]:
data.max(axis=1).head(5)

0      0.0
1    132.0
2     25.0
3    312.0
4    217.0
dtype: float64

In [13]:
# find max value in row for Bulgaria
print(data[data.country == 'Bulgaria'])
data.loc[data.country == 'Bulgaria'].max(axis=1)

     country  beer_servings  spirit_servings  wine_servings  \
25  Bulgaria            231              252             94   

    total_litres_of_pure_alcohol continent  
25                          10.3        EU  


25    252.0
dtype: float64

### Get row index label of Maximum value in a column

In [14]:
wine_max_idx = data.wine_servings.idxmax()
print(wine_max_idx)
data.iloc[wine_max_idx,:]

61


country                         France
beer_servings                      127
spirit_servings                    151
wine_servings                      370
total_litres_of_pure_alcohol      11.8
continent                           EU
Name: 61, dtype: object

## Detect missing values

In [15]:
# check weather any missing values exists:
print(data.isnull().values.any())

# show the sum of missing values per column:
print(data.isnull().sum())


True
country                          0
beer_servings                    0
spirit_servings                  0
wine_servings                    0
total_litres_of_pure_alcohol     0
continent                       23
dtype: int64


## Tasks

1. Selcts country names of non european countries which have wine servings above the mean
2. Selcts country names for countries with 0 wine and beer servings