# Домашнее задание к лекции "Базовые понятия статистики"

## Обязательная часть

Будем осуществлять работу с непростым [набором данных](https://raw.githubusercontent.com/obulygin/pyda_homeworks/master/statistics_basics/horse_data.csv) о состоянии здоровья лошадей, испытывающих кишечные колики. 

### Задание 1. Базовое изучение

Изучить представленный набор данных на основе [описания его столбцов](https://raw.githubusercontent.com/obulygin/pyda_homeworks/master/statistics_basics/horse_data.names) и выбрать 8 столбцов для дальнейшего изучения (среди них должны быть как числовые, так и категориальные). Провести расчет базовых метрик для них, кратко описать результаты.

In [1]:
import pandas as pd
import numpy as np

In [2]:
columns = [
    'surgery',  # 1 = Yes, it had surgery 
                # 2 = It was treated without surgery

    'Age',  # 1 = Adult horse
            # 2 = Young (< 6 months)
 
    'Hospital Number',  # numeric id
                        # the case number assigned to the horse
                        # (may not be unique if the horse is treated > 1 time)
 
    'rectal temperature',  # linear
                           # in degrees celsius.
                           # An elevated temp may occur due to infection.
                           # temperature may be reduced when the animal is in late shock
                           # normal temp is 37.8
                           # this parameter will usually change as the problem progresses
                           #   eg. may start out normal, then become elevated because of
                           #     the lesion, passing back through the normal range as the
                           #     horse goes into shock
  
    'pulse',  # linear
              # the heart rate in beats per minute
              # is a reflection of the heart condition: 30 -40 is normal for adults
              # rare to have a lower than normal rate although athletic horses
              #     may have a rate of 20-25
              # animals with painful lesions or suffering from circulatory shock
              #     may have an elevated heart rate
 
    'respiratory rate',  # linear
                         # normal rate is 8 to 10
                         # usefulness is doubtful due to the great fluctuations
  
    'temperature of extremities',  # a subjective indication of peripheral circulation
                                   # possible values:
                                   #   1 = Normal
                                   #   2 = Warm
                                   #   3 = Cool
                                   #   4 = Cold
                                   # cool to cold extremities indicate possible shock
                                   # hot extremities should correlate with an elevated rectal temp.
    
    'peripheral pulse',  # subjective
                         # possible values are:
                         #   1 = normal
                         #   2 = increased
                         #   3 = reduced
                         #   4 = absent
                         # normal or increased p.p. are indicative of adequate circulation
                         #    while reduced or absent indicate poor perfusion
    
    'mucous membranes',  # a subjective measurement of colour
                         # possible values are:
                         #   1 = normal pink
                         #   2 = bright pink
                         #   3 = pale pink
                         #   4 = pale cyanotic
                         #   5 = bright red / injected
                         #   6 = dark cyanotic
                         # 1 and 2 probably indicate a normal or slightly increased
                         #    circulation
                         # 3 may occur in early shock
                         # 4 and 6 are indicative of serious circulatory compromise
                         # 5 is more indicative of a septicemia
    
    'capillary refill time',  # a clinical judgement. The longer the refill, the poorer the
                              #   circulation
                              # possible values
                              #   1 = < 3 seconds
                              #   2 = >= 3 seconds
    
    'pain',  # a subjective judgement of the horse's pain level
             # possible values:
             #   1 = alert, no pain
             #   2 = depressed
             #   3 = intermittent mild pain
             #   4 = intermittent severe pain
             #   5 = continuous severe pain
             # should NOT be treated as a ordered or discrete variable!
             # In general, the more painful, the more likely it is to require
             #    surgery
             # prior treatment of pain may mask the pain level to some extent
    
    'peristalsis',  # an indication of the activity in the horse's gut. As the gut
                    #   becomes more distended or the horse becomes more toxic, the
                    #   activity decreases
                    # possible values:
                    #   1 = hypermotile
                    #   2 = normal
                    #   3 = hypomotile
                    #   4 = absent
    
    'abdominal distension',  # An IMPORTANT parameter.
                             # possible values
                             #   1 = none
                             #   2 = slight
                             #   3 = moderate
                             #   4 = severe
                             # an animal with abdominal distension is likely to be painful and
                             #    have reduced gut motility.
                             # a horse with severe abdominal distension is likely to require
                             #    surgery just tio relieve the pressure
    
    'nasogastric tube',  # this refers to any gas coming out of the tube
                         # possible values:
                         #   1 = none
                         #   2 = slight
                         #   3 = significant
                         # a large gas cap in the stomach is likely to give the horse
                         #    discomfort
    
    'nasogastric reflux',  # possible values
                           #   1 = none
                           #   2 = > 1 liter
                           #   3 = < 1 liter
                           # the greater amount of reflux, the more likelihood that there is
                           #   some serious obstruction to the fluid passage from the rest of
                           #   the intestine
    
    'nasogastric reflux PH',  # linear
                              # scale is from 0 to 14 with 7 being neutral
                              # normal values are in the 3 to 4 range

    
    'rectal examination - feces',  # possible values
                                   #   1 = normal
                                   #   2 = increased
                                   #   3 = decreased
                                   #   4 = absent
                                   # absent feces probably indicates an obstruction
    
    'abdomen',  # possible values
                #   1 = normal
                #   2 = other
                #   3 = firm feces in the large intestine
                #   4 = distended small intestine
                #   5 = distended large intestine
                # 3 is probably an obstruction caused by a mechanical impaction
                #   and is normally treated medically
                # 4 and 5 indicate a surgical lesion
    
    'packed cell volume',  # linear
                           # the # of red cells by volume in the blood
                           # normal range is 30 to 50. The level rises as the circulation
                           #  becomes compromised or as the animal becomes dehydrated.

    
    'total protein',  # linear
                      # normal values lie in the 6-7.5 (gms/dL) range
                      # the higher the value the greater the dehydration
    
    'abdominocentesis appearance',  # a needle is put in the horse's abdomen and fluid is obtained from
                                    #   the abdominal cavity
                                    # possible values:
                                    #   1 = clear
                                    #   2 = cloudy
                                    #   3 = serosanguinous
                                    # normal fluid is clear while cloudy or serosanguinous indicates
                                    #   a compromised gut
    
    'abdomcentesis total protein',  # linear
                                    # the higher the level of protein the more likely it is to have a
                                    #    compromised gut. Values are in gms/dL

    
    'outcome',  # what eventually happened to the horse?
                # possible values:
                #   1 = lived
                #   2 = died
                #   3 = was euthanized
# type of lesion
#     - first number is site of lesion
#                1 = gastric
#                2 = sm intestine
#                3 = lg colon
#                4 = lg colon and cecum
#                5 = cecum
#                6 = transverse colon
#                7 = retum/descending colon
#                8 = uterus
#                9 = bladder
#                11 = all intestinal sites
#                00 = none
#           - second number is type
#                1 = simple
#                2 = strangulation
#                3 = inflammation
#                4 = other
#           - third number is subtype
#                1 = mechanical
#                2 = paralytic
#                0 = n/a
#           - fourth number is specific code
#                1 = obturation
#                2 = intrinsic
#                3 = extrinsic
#                4 = adynamic
#                5 = volvulus/torsion
#                6 = intussuption
#                7 = thromboembolic
#                8 = hernia
#                9 = lipoma/slenic incarceration
#                10 = displacement
#                0 = n/a
    'surgical lesion_1',
    'surgical lesion_2',
    'surgical lesion_3',
    'type of lesion',
    
    'cp_data'  # is pathology data present for this case?
               #   1 = Yes
               #   2 = No
               # this variable is of no significance since pathology data
               #    is not included or collected for these cases
        ]

In [3]:
df = pd.read_csv('horse_data.csv', header=None, names = columns)
df.head(10)

Unnamed: 0,surgery,Age,Hospital Number,rectal temperature,pulse,respiratory rate,temperature of extremities,peripheral pulse,mucous membranes,capillary refill time,...,packed cell volume,total protein,abdominocentesis appearance,abdomcentesis total protein,outcome,surgical lesion_1,surgical lesion_2,surgical lesion_3,type of lesion,cp_data
0,2,1,530101,38.50,66,28,3,3,?,2,...,45.00,8.40,?,?,2,2,11300,0,0,2
1,1,1,534817,39.2,88,20,?,?,4,1,...,50,85,2,2,3,2,2208,0,0,2
2,2,1,530334,38.30,40,24,1,1,3,1,...,33.00,6.70,?,?,1,2,0,0,0,1
3,1,9,5290409,39.10,164,84,4,1,6,2,...,48.00,7.20,3,5.30,2,1,2208,0,0,1
4,2,1,530255,37.30,104,35,?,?,6,2,...,74.00,7.40,?,?,2,2,4300,0,0,2
5,2,1,528355,?,?,?,2,1,3,1,...,?,?,?,?,1,2,0,0,0,2
6,1,1,526802,37.90,48,16,1,1,1,1,...,37.00,7.00,?,?,1,1,3124,0,0,2
7,1,1,529607,?,60,?,3,?,?,1,...,44.00,8.30,?,?,2,1,2208,0,0,2
8,2,1,530051,?,80,36,3,4,3,1,...,38.00,6.20,?,?,3,1,3205,0,0,2
9,2,9,5299629,38.30,90,?,1,?,1,1,...,40.00,6.20,1,2.20,1,2,0,0,0,1


In [4]:
# столбцах значения 0 вообще не предусмотрено поэтому меняем все ? и 0 на NaN
df.replace(['?', 0], np.nan, inplace=True)
df.head(10)

Unnamed: 0,surgery,Age,Hospital Number,rectal temperature,pulse,respiratory rate,temperature of extremities,peripheral pulse,mucous membranes,capillary refill time,...,packed cell volume,total protein,abdominocentesis appearance,abdomcentesis total protein,outcome,surgical lesion_1,surgical lesion_2,surgical lesion_3,type of lesion,cp_data
0,2,1,530101,38.5,66.0,28.0,3.0,3.0,,2,...,45.0,8.4,,,2,2,11300.0,,,2
1,1,1,534817,39.2,88.0,20.0,,,4.0,1,...,50.0,85.0,2.0,2.0,3,2,2208.0,,,2
2,2,1,530334,38.3,40.0,24.0,1.0,1.0,3.0,1,...,33.0,6.7,,,1,2,,,,1
3,1,9,5290409,39.1,164.0,84.0,4.0,1.0,6.0,2,...,48.0,7.2,3.0,5.3,2,1,2208.0,,,1
4,2,1,530255,37.3,104.0,35.0,,,6.0,2,...,74.0,7.4,,,2,2,4300.0,,,2
5,2,1,528355,,,,2.0,1.0,3.0,1,...,,,,,1,2,,,,2
6,1,1,526802,37.9,48.0,16.0,1.0,1.0,1.0,1,...,37.0,7.0,,,1,1,3124.0,,,2
7,1,1,529607,,60.0,,3.0,,,1,...,44.0,8.3,,,2,1,2208.0,,,2
8,2,1,530051,,80.0,36.0,3.0,4.0,3.0,1,...,38.0,6.2,,,3,1,3205.0,,,2
9,2,9,5299629,38.3,90.0,,1.0,,1.0,1,...,40.0,6.2,1.0,2.2,1,2,,,,1


In [5]:
df = df.apply(pd.to_numeric, errors='coerce')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300 entries, 0 to 299
Data columns (total 28 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   surgery                      299 non-null    float64
 1   Age                          300 non-null    int64  
 2   Hospital Number              300 non-null    int64  
 3   rectal temperature           240 non-null    float64
 4   pulse                        276 non-null    float64
 5   respiratory rate             242 non-null    float64
 6   temperature of extremities   244 non-null    float64
 7   peripheral pulse             231 non-null    float64
 8   mucous membranes             253 non-null    float64
 9   capillary refill time        268 non-null    float64
 10  pain                         245 non-null    float64
 11  peristalsis                  256 non-null    float64
 12  abdominal distension         244 non-null    float64
 13  nasogastric tube    

In [6]:
new_df = df[['surgery','Age','pain','outcome','rectal temperature','pulse','respiratory rate','total protein']].copy()
new_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300 entries, 0 to 299
Data columns (total 8 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   surgery             299 non-null    float64
 1   Age                 300 non-null    int64  
 2   pain                245 non-null    float64
 3   outcome             299 non-null    float64
 4   rectal temperature  240 non-null    float64
 5   pulse               276 non-null    float64
 6   respiratory rate    242 non-null    float64
 7   total protein       267 non-null    float64
dtypes: float64(7), int64(1)
memory usage: 18.9 KB


In [7]:
new_df.describe()

Unnamed: 0,surgery,Age,pain,outcome,rectal temperature,pulse,respiratory rate,total protein
count,299.0,300.0,245.0,299.0,240.0,276.0,242.0,267.0
mean,1.397993,1.64,2.95102,1.551839,38.167917,71.913043,30.417355,24.456929
std,0.490305,2.173972,1.30794,0.737187,0.732289,28.630557,17.642231,27.475009
min,1.0,1.0,1.0,1.0,35.4,30.0,8.0,3.3
25%,1.0,1.0,2.0,1.0,37.8,48.0,18.5,6.5
50%,1.0,1.0,3.0,1.0,38.2,64.0,24.5,7.5
75%,2.0,1.0,4.0,2.0,38.5,88.0,36.0,57.0
max,2.0,9.0,5.0,3.0,40.8,184.0,96.0,89.0


In [8]:
# столбцах значения 0 вообще не предусмотрено поэтому меняем все ? и 0 на NaN
new_df.replace(['?', 0], np.nan, inplace=True)
new_df.head(10)

Unnamed: 0,surgery,Age,pain,outcome,rectal temperature,pulse,respiratory rate,total protein
0,2.0,1,5.0,2.0,38.5,66.0,28.0,8.4
1,1.0,1,3.0,3.0,39.2,88.0,20.0,85.0
2,2.0,1,3.0,1.0,38.3,40.0,24.0,6.7
3,1.0,9,2.0,2.0,39.1,164.0,84.0,7.2
4,2.0,1,,2.0,37.3,104.0,35.0,7.4
5,2.0,1,2.0,1.0,,,,
6,1.0,1,3.0,1.0,37.9,48.0,16.0,7.0
7,1.0,1,,2.0,,60.0,,8.3
8,2.0,1,4.0,3.0,,80.0,36.0,6.2
9,2.0,9,5.0,1.0,38.3,90.0,,6.2


Датафрейм содержит пропуски тербующие обработки (все столбцы с количеством значний меньше 299).
Столбцы "Age", имеют ошибки в заполнении данными (внесены не допустимые значения), которые необходимо исправить.
Столбцы "pulse", "respiratory rate" возможно имеют выбросы требующие дополнительного анализа.

### Задание 2. Работа с выбросами

В выбранных числовых столбцах найти выбросы, выдвинуть гипотезы об их причинах и проинтерпретировать результаты. Принять и обосновать решение о дальнейшей работе с ними.

Total protein в некотрых значения слишком завышен, смею предположить данныи были внесены не корректно(необходимо сдвинуть число) 

In [9]:
new_df['total protein'] = np.where(new_df['total protein'] < 30, new_df['total protein'], new_df['total protein']/10)
new_df.head(15)

Unnamed: 0,surgery,Age,pain,outcome,rectal temperature,pulse,respiratory rate,total protein
0,2.0,1,5.0,2.0,38.5,66.0,28.0,8.4
1,1.0,1,3.0,3.0,39.2,88.0,20.0,8.5
2,2.0,1,3.0,1.0,38.3,40.0,24.0,6.7
3,1.0,9,2.0,2.0,39.1,164.0,84.0,7.2
4,2.0,1,,2.0,37.3,104.0,35.0,7.4
5,2.0,1,2.0,1.0,,,,
6,1.0,1,3.0,1.0,37.9,48.0,16.0,7.0
7,1.0,1,,2.0,,60.0,,8.3
8,2.0,1,4.0,3.0,,80.0,36.0,6.2
9,2.0,9,5.0,1.0,38.3,90.0,,6.2


In [10]:
def find_ejection(column):
    q1 = new_df[column].quantile(0.25)
    q3 = new_df[column].quantile(0.75)
    iqr = q3 - q1
    lower_bound = q1 - (1.5 * iqr) 
    upper_bound = q3 + (1.5 * iqr)
    remove_outliers = new_df[new_df[column].between(lower_bound, upper_bound, inclusive=True)].sort_values(column)
    result = pd.concat([new_df, remove_outliers]).drop_duplicates(keep=False)
    return result.dropna(subset=[column])

In [11]:
find_ejection('Age')

Unnamed: 0,surgery,Age,pain,outcome,rectal temperature,pulse,respiratory rate,total protein
3,1.0,9,2.0,2.0,39.1,164.0,84.0,7.2
9,2.0,9,5.0,1.0,38.3,90.0,,6.2
13,2.0,9,1.0,2.0,38.0,92.0,28.0,6.1
16,1.0,9,4.0,2.0,,128.0,36.0,7.8
23,1.0,9,2.0,1.0,38.3,130.0,60.0,7.0
39,1.0,9,,2.0,39.2,146.0,96.0,
41,2.0,9,,1.0,39.0,150.0,72.0,8.5
55,1.0,9,3.0,2.0,38.6,160.0,20.0,
74,1.0,9,,2.0,,,,4.9
75,1.0,9,2.0,3.0,39.7,100.0,,5.7


In [12]:
new_df['Age'] = new_df['Age'].replace(9, 2)

In [13]:
find_ejection('respiratory rate')

Unnamed: 0,surgery,Age,pain,outcome,rectal temperature,pulse,respiratory rate,total protein
3,1.0,2,2.0,2.0,39.1,164.0,84.0,7.2
39,1.0,2,,2.0,39.2,146.0,96.0,
41,2.0,2,,1.0,39.0,150.0,72.0,8.5
82,1.0,2,3.0,1.0,38.1,100.0,80.0,5.7
84,1.0,1,2.0,1.0,37.8,60.0,80.0,4.5
103,1.0,2,3.0,1.0,38.0,140.0,68.0,5.3
106,1.0,1,,1.0,38.3,52.0,96.0,6.1
120,1.0,1,2.0,1.0,39.4,54.0,66.0,6.0
125,1.0,1,3.0,1.0,38.0,42.0,68.0,7.6
208,1.0,1,,3.0,37.8,88.0,80.0,8.9


In [14]:
new_df['respiratory rate'] = np.where(new_df['respiratory rate'] < 70, new_df['respiratory rate'], np.nan)
new_df.head(15)

Unnamed: 0,surgery,Age,pain,outcome,rectal temperature,pulse,respiratory rate,total protein
0,2.0,1,5.0,2.0,38.5,66.0,28.0,8.4
1,1.0,1,3.0,3.0,39.2,88.0,20.0,8.5
2,2.0,1,3.0,1.0,38.3,40.0,24.0,6.7
3,1.0,2,2.0,2.0,39.1,164.0,,7.2
4,2.0,1,,2.0,37.3,104.0,35.0,7.4
5,2.0,1,2.0,1.0,,,,
6,1.0,1,3.0,1.0,37.9,48.0,16.0,7.0
7,1.0,1,,2.0,,60.0,,8.3
8,2.0,1,4.0,3.0,,80.0,36.0,6.2
9,2.0,2,5.0,1.0,38.3,90.0,,6.2


In [15]:
new_df.describe()

Unnamed: 0,surgery,Age,pain,outcome,rectal temperature,pulse,respiratory rate,total protein
count,299.0,300.0,245.0,299.0,240.0,276.0,229.0,267.0
mean,1.397993,1.08,2.95102,1.551839,38.167917,71.913043,27.427948,6.854682
std,0.490305,0.271746,1.30794,0.737187,0.732289,28.630557,12.559693,1.091425
min,1.0,1.0,1.0,1.0,35.4,30.0,8.0,3.3
25%,1.0,1.0,2.0,1.0,37.8,48.0,18.0,6.2
50%,1.0,1.0,3.0,1.0,38.2,64.0,24.0,6.7
75%,2.0,1.0,4.0,2.0,38.5,88.0,36.0,7.5
max,2.0,2.0,5.0,3.0,40.8,184.0,68.0,13.0


### Задание 3. Работа с пропусками

Рассчитать количество выбросов для всех выбранных столбцов. Принять и обосновать решение о методе работы с пропусками по каждому столбцу, сформировать датафрейм, в котором пропуски будут отсутствовать.

In [16]:
new_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300 entries, 0 to 299
Data columns (total 8 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   surgery             299 non-null    float64
 1   Age                 300 non-null    int64  
 2   pain                245 non-null    float64
 3   outcome             299 non-null    float64
 4   rectal temperature  240 non-null    float64
 5   pulse               276 non-null    float64
 6   respiratory rate    229 non-null    float64
 7   total protein       267 non-null    float64
dtypes: float64(7), int64(1)
memory usage: 18.9 KB


In [17]:
new_df_drop = new_df.dropna(thresh=5)

In [18]:
new_df_drop.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 289 entries, 0 to 299
Data columns (total 8 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   surgery             288 non-null    float64
 1   Age                 289 non-null    int64  
 2   pain                243 non-null    float64
 3   outcome             288 non-null    float64
 4   rectal temperature  240 non-null    float64
 5   pulse               275 non-null    float64
 6   respiratory rate    229 non-null    float64
 7   total protein       263 non-null    float64
dtypes: float64(7), int64(1)
memory usage: 20.3 KB


In [19]:
print(f'Количество пропусков в столбцах:')
for column in new_df_drop:
    pct_missing = new_df_drop[column].isna().mean()
    print(f'{column} - {pct_missing :.1%}')

Количество пропусков в столбцах:
surgery - 0.3%
Age - 0.0%
pain - 15.9%
outcome - 0.3%
rectal temperature - 17.0%
pulse - 4.8%
respiratory rate - 20.8%
total protein - 9.0%


In [20]:
short_df = new_df.dropna(thresh=8)

In [21]:
short_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 156 entries, 0 to 298
Data columns (total 8 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   surgery             156 non-null    float64
 1   Age                 156 non-null    int64  
 2   pain                156 non-null    float64
 3   outcome             156 non-null    float64
 4   rectal temperature  156 non-null    float64
 5   pulse               156 non-null    float64
 6   respiratory rate    156 non-null    float64
 7   total protein       156 non-null    float64
dtypes: float64(7), int64(1)
memory usage: 11.0 KB


In [22]:
open_df = new_df.copy()

In [23]:
open_df['pain'].fillna(open_df['pain'].mean(), inplace=True)
open_df['rectal temperature'].fillna(open_df['rectal temperature'].mean(), inplace=True)
open_df['pulse'].fillna(open_df['pulse'].mean(), inplace=True)
open_df['total protein'].fillna(open_df['total protein'].mean(), inplace=True)
open_df['respiratory rate'].fillna(open_df['respiratory rate'].mean(), inplace=True)

In [24]:
open_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300 entries, 0 to 299
Data columns (total 8 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   surgery             299 non-null    float64
 1   Age                 300 non-null    int64  
 2   pain                300 non-null    float64
 3   outcome             299 non-null    float64
 4   rectal temperature  300 non-null    float64
 5   pulse               300 non-null    float64
 6   respiratory rate    300 non-null    float64
 7   total protein       300 non-null    float64
dtypes: float64(7), int64(1)
memory usage: 18.9 KB


In [25]:
open_df.describe()

Unnamed: 0,surgery,Age,pain,outcome,rectal temperature,pulse,respiratory rate,total protein
count,299.0,300.0,300.0,299.0,300.0,300.0,300.0,300.0
mean,1.397993,1.08,2.95102,1.551839,38.167917,71.913043,27.427948,6.854682
std,0.490305,0.271746,1.181536,0.737187,0.654705,27.457472,10.967581,1.029436
min,1.0,1.0,1.0,1.0,35.4,30.0,8.0,3.3
25%,1.0,1.0,2.0,1.0,37.9,48.0,20.0,6.3
50%,1.0,1.0,2.95102,1.0,38.167917,68.0,27.427948,6.854682
75%,2.0,1.0,4.0,2.0,38.5,88.0,30.0,7.4
max,2.0,2.0,5.0,3.0,40.8,184.0,68.0,13.0
