6.2. Модуль pandas

Длины всех слов - 2

In [30]:
import pandas as pd
import string


def length_stats(text):
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    words = text.split()
    words = set([word for word in words if not any(char.isdigit() for char in word)])
    words = sorted(words)
    lengths = pd.Series([len(word) for word in words], index=words)
    
    return lengths

In [31]:
print(length_stats('Мама мыла раму'))

мама    4
мыла    4
раму    4
dtype: int64


In [33]:
print(length_stats('Лес, опушка, странный домик. Лес, опушка и зверушка.'))

домик       5
зверушка    8
и           1
лес         3
опушка      6
странный    8
dtype: int64


Длины всех слов по чётности

In [57]:
import pandas as pd
import re


def length_stats(text):
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)
    text = re.sub(r'\d+', '', text) 
    
    words = sorted(text.split())
                   
    odd_words = {word: len(word) for word in words if len(word) % 2 != 0}
    even_words = {word: len(word) for word in words if len(word) % 2 == 0}
    
    odd = pd.Series(odd_words, dtype='int64')
    even = pd.Series(even_words, dtype='int64')
    
    return odd, even

In [58]:
odd, even = length_stats('Мама мыла раму')
print(odd)
print(even)

Series([], dtype: int64)
мама    4
мыла    4
раму    4
dtype: int64


In [59]:
odd, even = length_stats('Лес, опушка, странный домик. Лес, опушка и зверушка.')
print(odd)
print(even)

домик    5
и        1
лес      3
dtype: int64
зверушка    8
опушка      6
странный    8
dtype: int64


Чек - 2

In [60]:
import pandas as pd


def cheque(price_list, **purchases):
    data = []
    
    for product, number in purchases.items():
        price = price_list[product]
        cost = price * number
        data.append((product, price, number, cost))
    
    df = pd.DataFrame(data, columns=['product', 'price', 'number', 'cost'])
    
    df = df.sort_values(by='product').reset_index(drop=True)
    
    return df

In [61]:
products = ['bread', 'milk', 'soda', 'cream']
prices = [37, 58, 99, 72]
price_list = pd.Series(prices, index=products)
result = cheque(price_list, soda=3, milk=2, cream=1)
print(result)

  product  price  number  cost
0   cream     72       1    72
1    milk     58       2   116
2    soda     99       3   297


Акция

In [73]:
import pandas as pd


def cheque(price_list, **kwargs):
    my_products = sorted(kwargs)
    product_dict = {
        'product': my_products,
        'price': [price_list[i] for i in my_products],
        'number': [kwargs[i] for i in my_products]
    }
    product_dict = pd.DataFrame(product_dict)
    product_dict['cost'] = product_dict['price'] * product_dict['number']
    
    return product_dict


def discount(s):
    new_s = s.copy()
    for i in range(len(new_s.loc[:, 'cost'])):
        new_s.loc[i, 'cost'] /= 1 + (s.loc[:, 'number'][i] > 2)
    return new_s

In [74]:
products = ['bread', 'milk', 'soda', 'cream']
prices = [37, 58, 99, 72]
price_list = pd.Series(prices, index=products)
result = cheque(price_list, soda=3, milk=2, cream=1)
with_discount = discount(result)
print(result)
print(with_discount)

  product  price  number  cost
0   cream     72       1    72
1    milk     58       2   116
2    soda     99       3   297
  product  price  number   cost
0   cream     72       1   72.0
1    milk     58       2  116.0
2    soda     99       3  148.5


  new_s.loc[i, 'cost'] /= 1 + (s.loc[:, 'number'][i] > 2)


Длинные слова

In [66]:
def get_long(data, min_length=5):
    return data[data >= min_length]

In [67]:
data = pd.Series([3, 5, 6, 6], ['мир', 'питон', 'привет', 'яндекс'])
filtered = get_long(data)
print(data)
print(filtered)

мир       3
питон     5
привет    6
яндекс    6
dtype: int64
питон     5
привет    6
яндекс    6
dtype: int64


In [68]:
data = pd.Series([3, 5, 6, 6], ['мир', 'питон', 'привет', 'яндекс'])
filtered = get_long(data, min_length=6)
print(data)
print(filtered)

мир       3
питон     5
привет    6
яндекс    6
dtype: int64
привет    6
яндекс    6
dtype: int64


Отчёт успеваемости

In [35]:
import pandas as pd

def best(journal):
    return journal[(journal['maths'] > 3) & (journal['physics'] > 3) & (journal['computer science'] > 3)]

In [36]:
columns = ['name', 'maths', 'physics', 'computer science']
data = {
    'name': ['Иванов', 'Петров', 'Сидоров', 'Васечкин', 'Николаев'],
    'maths': [5, 4, 5, 2, 4],
    'physics': [4, 4, 4, 5, 5],
    'computer science': [5, 2, 5, 4, 3]
}
journal = pd.DataFrame(data, columns=columns)
filtered = best(journal)
print(journal)
print(filtered)

       name  maths  physics  computer science
0    Иванов      5        4                 5
1    Петров      4        4                 2
2   Сидоров      5        4                 5
3  Васечкин      2        5                 4
4  Николаев      4        5                 3
      name  maths  physics  computer science
0   Иванов      5        4                 5
2  Сидоров      5        4                 5


Отчёт неуспеваемости

In [37]:
import pandas as pd


def need_to_work_better(j):
    new_j = j.copy()
    return new_j[(new_j['maths'] < 3) | (new_j['physics'] < 3) | (new_j['computer science'] < 3)]

In [38]:
columns = ['name', 'maths', 'physics', 'computer science']
data = {
    'name': ['Иванов', 'Петров', 'Сидоров', 'Васечкин', 'Николаев'],
    'maths': [5, 4, 5, 2, 4],
    'physics': [4, 4, 4, 5, 5],
    'computer science': [5, 2, 5, 4, 3]
}
journal = pd.DataFrame(data, columns=columns)
filtered = need_to_work_better(journal)
print(journal)
print(filtered)

       name  maths  physics  computer science
0    Иванов      5        4                 5
1    Петров      4        4                 2
2   Сидоров      5        4                 5
3  Васечкин      2        5                 4
4  Николаев      4        5                 3
       name  maths  physics  computer science
1    Петров      4        4                 2
3  Васечкин      2        5                 4


Обновление журнала

In [39]:
import pandas as pd


def update(journal):
   
    journal_copy = journal.copy()
   
    journal_copy['average'] = journal_copy[['maths', 'physics', 'computer science']].mean(axis=1) # Добавляем столбец average с средним значением оценок
    
    journal_copy = journal_copy.sort_values(by=['average', 'name'], ascending=[False, True]) # Сортируем данные по убыванию average, а при равенстве средних — по имени лексикографически
    
    return journal_copy

In [41]:
columns = ['name', 'maths', 'physics', 'computer science']
data = {
    'name': ['Иванов', 'Петров', 'Сидоров', 'Васечкин', 'Николаев'],
    'maths': [5, 4, 5, 2, 4],
    'physics': [4, 4, 4, 5, 5],
    'computer science': [5, 2, 5, 4, 3]
}
journal = pd.DataFrame(data, columns=columns)
filtered = update(journal)
print(journal)
print(filtered)

       name  maths  physics  computer science
0    Иванов      5        4                 5
1    Петров      4        4                 2
2   Сидоров      5        4                 5
3  Васечкин      2        5                 4
4  Николаев      4        5                 3
       name  maths  physics  computer science   average
0    Иванов      5        4                 5  4.666667
2   Сидоров      5        4                 5  4.666667
4  Николаев      4        5                 3  4.000000
3  Васечкин      2        5                 4  3.666667
1    Петров      4        4                 2  3.333333


Бесконечный морской бой

In [25]:
import pandas as pd

a, b = map(int, input().split())
c, d = map(int, input().split())
data = pd.read_csv('data.csv')
print(data[(a <= data['x']) & (data['x'] <= c) & (d <= data['y']) & (data['y'] <= b)])

         x   y
6262     9   0
59060   10   4
69882   10   5
72739    0   0
120951   3   1
137931   9  10
183595   7   0
194157   0   9
219910   0   3
220920  10   0
242318   8   4
283651   1   8
292990   4   3
294474   6   3
352959  10  10
393223   3   5
423449   1   2


Экстремум функции

In [42]:
import numpy as np
import pandas as pd


def values(func, start, end, step):
    index = np.arange(start, end + step, step)
    return pd.Series(map(func, index), index=index, dtype='float64')


def min_extremum(data):
    return min(data[data == min(data)].index)
    
    
def max_extremum(data):
    return max(data[data == max(data)].index)

In [44]:
data = values(lambda x: x ** 2 + 2 * x + 1, -1.5, 1.7, 0.1)
print(data)
print(min_extremum(data))
print(max_extremum(data))

-1.500000e+00    0.25
-1.400000e+00    0.16
-1.300000e+00    0.09
-1.200000e+00    0.04
-1.100000e+00    0.01
-1.000000e+00    0.00
-9.000000e-01    0.01
-8.000000e-01    0.04
-7.000000e-01    0.09
-6.000000e-01    0.16
-5.000000e-01    0.25
-4.000000e-01    0.36
-3.000000e-01    0.49
-2.000000e-01    0.64
-1.000000e-01    0.81
 1.332268e-15    1.00
 1.000000e-01    1.21
 2.000000e-01    1.44
 3.000000e-01    1.69
 4.000000e-01    1.96
 5.000000e-01    2.25
 6.000000e-01    2.56
 7.000000e-01    2.89
 8.000000e-01    3.24
 9.000000e-01    3.61
 1.000000e+00    4.00
 1.100000e+00    4.41
 1.200000e+00    4.84
 1.300000e+00    5.29
 1.400000e+00    5.76
 1.500000e+00    6.25
 1.600000e+00    6.76
 1.700000e+00    7.29
dtype: float64
-0.9999999999999996
1.7000000000000028
