In [106]:
"""                     Vectorized String Operations                  """

'                     Vectorized String Operations                  '

In [107]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt


In [108]:
# >>------------>>Introducing Pandas String Operations <<------------<<< #

In [109]:
x = np.array([2, 3, 5, 7, 11, 13])
x * 2

array([ 4,  6, 10, 14, 22, 26])

In [110]:
data = ["pater", "Paul", "MARY", "gUIDO"]
[s.capitalize() for s in data]

['Pater', 'Paul', 'Mary', 'Guido']

In [111]:
# data = ['peter', 'Paul', None, 'MARY', 'gUIDO']
# [s.capitalize() for s in data]

# AttributeError: 'NoneType' object has no attribute 'capitalize'

In [112]:
names = pd.Series(data)
names 

0    pater
1     Paul
2     MARY
3    gUIDO
dtype: object

In [113]:
names.str.capitalize()

0    Pater
1     Paul
2     Mary
3    Guido
dtype: object

In [114]:
# >>>------------->> Tables of Pandas String Methods <<-------------<<< #

In [115]:
monte = pd.Series([
    'Graham Chapman', 
    'John Cleese', 
    'Terry Gilliam',
    'Eric Idle', 
    'Terry Jones', 
    'Michael Palin'
])


In [116]:
# Methods similar to Python string methods


In [117]:
# len()        lower()        translate()   islower()
# ljust()      upper()        startswith()  isupper()
# rjust()      find()         endswith()    isnumeric()
# center()     rfind()        isalnum()     isdecimal()
# zfill()      index()        isalpha()     split()
# strip()      rindex()       isdigit()     rsplit()
# rstrip()     capitalize()  isspace()     partition()
# lstrip()     swapcase()    istitle()     rpartition()


In [118]:
monte.str.lower()

0    graham chapman
1       john cleese
2     terry gilliam
3         eric idle
4       terry jones
5     michael palin
dtype: object

In [119]:
monte.str.len()

0    14
1    11
2    13
3     9
4    11
5    13
dtype: int64

In [120]:
monte.str.startswith("T")

0    False
1    False
2     True
3    False
4     True
5    False
dtype: bool

In [121]:
monte.str.split()

0    [Graham, Chapman]
1       [John, Cleese]
2     [Terry, Gilliam]
3         [Eric, Idle]
4       [Terry, Jones]
5     [Michael, Palin]
dtype: object

In [122]:
# Methods using regular expressions


In [123]:
# | Metod        | Tavsif                                                                                             |
# | ------------ | -------------------------------------------------------------------------------------------------- |
# | `match()`    | Har bir element uchun `re.match()` chaqiradi va **Boolean** qiymat qaytaradi                       |
# | `extract()`  | Har bir element uchun `re.match()` chaqiradi va mos tushgan guruhlarni satr ko‘rinishida qaytaradi |
# | `findall()`  | Har bir element uchun `re.findall()` chaqiradi                                                     |
# | `replace()`  | Shablon (pattern) uchragan joylarni boshqa satr bilan almashtiradi                                 |
# | `contains()` | Har bir element uchun `re.search()` chaqiradi va **Boolean** qaytaradi                             |
# | `count()`    | Shablon nechta marta uchraganini hisoblaydi                                                        |
# | `split()`    | `str.split()` bilan bir xil, ammo **regex** qabul qiladi                                           |
# | `rsplit()`   | `str.rsplit()` bilan bir xil, ammo **regex** qabul qiladi                                          |




# # ENG
# |Method    | Description
# |----------|--------------------------------------------------------------------------------------------------------|
# |match()   | Call re.match() on each element, returning a Boolean.
# |extract() | Call re.match() on each element, returning matched groups as strings.
# |findall() | Call re.findall() on each element.
# |replace() | Replace occurrences of pattern with some other string.
# |contains()| Call re.search() on each element, returning a Boolean.
# |ount()   | Count occurrences of pattern.
# |plit()   | Equivalent to str.split(), but accepts regexps.
# |rsplit()  | Equivalent to str.rsplit(), but accepts regexps.



In [124]:
monte.str.extract("([A-Za-z]+)")

Unnamed: 0,0
0,Graham
1,John
2,Terry
3,Eric
4,Terry
5,Michael


In [125]:
monte.str.findall(r'^[^AEIOU].*[^aeiou]$')


0    [Graham Chapman]
1                  []
2     [Terry Gilliam]
3                  []
4       [Terry Jones]
5     [Michael Palin]
dtype: object

In [126]:
# Boshqa (Miscellaneous) Pandas satr metodlari

In [127]:
# | Metod             | Tavsif                                                             |
# | ----------------- | ------------------------------------------------------------------ |
# | `get()`           | Har bir elementdan indeks bo‘yicha qiymat olish                    |
# | `slice()`         | Har bir elementni kesib olish                                      |
# | `slice_replace()` | Elementning ma’lum qismini boshqa qiymat bilan almashtirish        |
# | `cat()`           | Satrlarni birlashtirish                                            |
# | `repeat()`        | Qiymatlarni takrorlash                                             |
# | `normalize()`     | Unicode formatiga o‘tkazish                                        |
# | `pad()`           | Satrning chap, o‘ng yoki ikkala tomoniga bo‘sh joy qo‘shish        |
# | `wrap()`          | Uzun satrlarni berilgan kenglikdan kichik qatorlarga bo‘lish       |
# | `join()`          | Series elementlaridagi satrlarni ajratuvchi bilan birlashtirish    |
# | `get_dummies()`   | Dummy (indikator) o‘zgaruvchilarni DataFrame ko‘rinishida ajratish |


In [128]:
monte.str.slice(0, 3)

0    Gra
1    Joh
2    Ter
3    Eri
4    Ter
5    Mic
dtype: object

In [129]:
monte.str[0:3]

0    Gra
1    Joh
2    Ter
3    Eri
4    Ter
5    Mic
dtype: object

In [130]:
monte.get(3)

'Eric Idle'

In [131]:
monte.str.split().str.get(-1)

0    Chapman
1     Cleese
2    Gilliam
3       Idle
4      Jones
5      Palin
dtype: object

In [132]:
# Indikator (dummy) o‘zgaruvchilar

In [133]:
full_monte = pd.DataFrame({
    "info":["B|C|D", "B|D", "A|C", "B|D", "B|C", "B|C|D"],
    "name":monte
})
full_monte

Unnamed: 0,info,name
0,B|C|D,Graham Chapman
1,B|D,John Cleese
2,A|C,Terry Gilliam
3,B|D,Eric Idle
4,B|C,Terry Jones
5,B|C|D,Michael Palin


In [134]:
full_monte["info"].str.get_dummies("|")

Unnamed: 0,A,B,C,D
0,0,1,1,1
1,0,1,0,1
2,1,0,1,0
3,0,1,0,1
4,0,1,1,0
5,0,1,1,1


In [135]:
"""                        Example: Recipe Database                    """

'                        Example: Recipe Database                    '

In [136]:
!curl -O http://openrecipes.s3.amazonaws.com/recipeitems-latest.json.gz

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100    20  100    20    0     0     33      0 --:--:-- --:--:-- --:--:--    34


In [137]:
!gunzip recipeitems-latest.json.gz

"gunzip" �� ���� ����७��� ��� ���譥�
��������, �ᯮ��塞�� �ணࠬ��� ��� ������ 䠩���.


In [140]:
try:
    recipes = pd.read_json('http://openrecipes.s3.amazonaws.com/recipeitems-latest.json.gz')
except ValueError as e:
    print("ValueError:", e)


ValueError: Expected object or value
