In [None]:
import numpy as np
import pandas as pd

NaN values:

In [106]:
df_float = pd.DataFrame(
    {
        'column_none': [1., 2., 3., 4., 5., None],
        'column_nan': [1., 2., 3., 4., 5., np.nan],
    }
)

In [107]:
df_float['column_none'] == df_float['column_nan']

0     True
1     True
2     True
3     True
4     True
5    False
dtype: bool

In [108]:
df_float

Unnamed: 0,column_none,column_nan
0,1.0,1.0
1,2.0,2.0
2,3.0,3.0
3,4.0,4.0
4,5.0,5.0
5,,


In [109]:
df_float.loc[5]

column_none   NaN
column_nan    NaN
Name: 5, dtype: float64

In [110]:
df_float['column_none'].dtype, df_float['column_nan'].dtype

(dtype('float64'), dtype('float64'))

`None` transforms to `np.nan`.

In [111]:
df_float.fillna("Not a number")

Unnamed: 0,column_none,column_nan
0,1.0,1.0
1,2.0,2.0
2,3.0,3.0
3,4.0,4.0
4,5.0,5.0
5,Not a number,Not a number


In [113]:
df_float.fillna("Not a number")["column_none"].dtype

dtype('O')

In [114]:
df_int = pd.DataFrame(
    {
        'column_none': [1, 2, 3, 4, 5, None],
        'column_nan': [1, 2, 3, 4, 5, np.nan],
    }
)

In [115]:
df_int['column_none'] == df_int['column_nan']

0     True
1     True
2     True
3     True
4     True
5    False
dtype: bool

In [116]:
df_int

Unnamed: 0,column_none,column_nan
0,1.0,1.0
1,2.0,2.0
2,3.0,3.0
3,4.0,4.0
4,5.0,5.0
5,,


In [117]:
df_int.loc[5]

column_none   NaN
column_nan    NaN
Name: 5, dtype: float64

In [118]:
df_int['column_none'].dtype, df_int['column_nan'].dtype

(dtype('float64'), dtype('float64'))

`int` was casted to `float`

In [119]:
df_int['column_nan'] = df_int['column_nan'].astype(np.int16)

IntCastingNaNError: Cannot convert non-finite values (NA or inf) to integer

column with `NaN` values cannot be casted to `int`

[Nullable integer](https://pandas.pydata.org/docs/user_guide/integer_na.html)

In [120]:
df_int['column_nan'] = df_int['column_nan'].astype("Int16")

What's the difference?

In [121]:
df_int

Unnamed: 0,column_none,column_nan
0,1.0,1.0
1,2.0,2.0
2,3.0,3.0
3,4.0,4.0
4,5.0,5.0
5,,


In [122]:
df_int['column_none'].nbytes, df_int['column_nan'].nbytes

(48, 18)

In [123]:
df_int.column_none.loc[5]

nan

In [124]:
df_int.column_nan.loc[5]

<NA>

In [125]:
df_int.isna()

Unnamed: 0,column_none,column_nan
0,False,False
1,False,False
2,False,False
3,False,False
4,False,False
5,True,True


In [126]:
df_int['column_none'].loc[5].nbytes

8

In [127]:
df_int.memory_usage(index=False)

column_none    48
column_nan     18
dtype: int64

In [128]:
df_int.columns = ['column_1', 'column_2']

In [129]:
df_int

Unnamed: 0,column_1,column_2
0,1.0,1.0
1,2.0,2.0
2,3.0,3.0
3,4.0,4.0
4,5.0,5.0
5,,


In [130]:
df_int.loc[1, 'column_1'] = None
df_int.loc[1, 'column_2'] = None

In [131]:
df_int

Unnamed: 0,column_1,column_2
0,1.0,1.0
1,,
2,3.0,3.0
3,4.0,4.0
4,5.0,5.0
5,,


In [135]:
df_int.fillna("Not a number")

TypeError: Invalid value 'Not a number' for dtype Int16

In [None]:
nas = df_int.column_2[df_int.column_2.isna()]

In [None]:
nas

1    <NA>
5    <NA>
Name: column_2, dtype: Int16

In [None]:
nas.iloc[0] == nas.iloc[1]

<NA>

In [None]:
nas.iloc[0] is nas.iloc[1]

True

In [None]:
nans = df_int.column_1[df_int.column_1.isna()]

In [None]:
nans

1   NaN
5   NaN
Name: column_1, dtype: float64

In [None]:
a, b = nans.values

In [None]:
a, b

(nan, nan)

In [None]:
a == b

False

In [None]:
a is b

False

In [None]:
set([float('nan'), float('nan')])

{nan, nan}

In [None]:
set([np.float64('nan'), np.float64('nan')])

{nan, nan}

In [None]:
set([pd.NA, pd.NA])

{<NA>}

In [None]:
set([np.nan, np.nan])

{nan}

In [None]:
np.nan is np.nan is np.NaN is np.NAN

True

In [None]:
pd.NA is pd.NA

True

In [None]:
type(1 + pd.NA)

pandas._libs.missing.NAType

In [None]:
id(1 + pd.NA), id(pd.NA)

(136279654204560, 136279654204560)

In [None]:
np.nan is np.NaN is np.NAN

True

In [None]:
type(1 + np.nan)

float

In [None]:
id(np.nan), id(np.nan + 1)

(136280243546320, 136279082784752)

## Summary Functions and Maps

In [None]:
!wget -O data.csv https://gist.githubusercontent.com/clairehq/79acab35be50eaf1c383948ed3fd1129/raw/407a02139ae1e134992b90b4b2b8c329b3d73a6a/winemag-data-130k-v2.csv

--2024-01-26 09:28:38--  https://gist.githubusercontent.com/clairehq/79acab35be50eaf1c383948ed3fd1129/raw/407a02139ae1e134992b90b4b2b8c329b3d73a6a/winemag-data-130k-v2.csv
Resolving gist.githubusercontent.com (gist.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to gist.githubusercontent.com (gist.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 26496255 (25M) [text/plain]
Saving to: ‘data.csv’


2024-01-26 09:28:38 (205 MB/s) - ‘data.csv’ saved [26496255/26496255]



In [None]:
df = pd.read_csv("data.csv", index_col=0)

In [None]:
df.sample(5)

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
13124,US,"The 12 Hanging Delta Vineyard is a plusher, mo...",Riesling 12 Hanging Delta Vineyard,89,30.0,New York,Finger Lakes,Finger Lakes,Anna Lee C. Iijima,,Fox Run 2012 Riesling 12 Hanging Delta Vineyar...,Riesling,Fox Run
43312,Chile,Major olive and charred aromas ride on top of ...,Reserva,84,13.0,Maule Valley,,,Michael Schachner,@wineschach,Viña Casas Patronales 2011 Reserva Carmenère (...,Carmenère,Viña Casas Patronales
34790,US,"This Pinot tastes so oaky, it's like chewing o...",The Flyer,85,59.0,California,Russian River Valley,Sonoma,,,MacPhail 2010 The Flyer Pinot Noir (Russian Ri...,Pinot Noir,MacPhail
36748,US,"This offers pleasant notes of apricot, dark ca...",,85,8.0,California,Clarksburg,Central Valley,Virginie Boone,@vboone,Baron Herzog 2011 Chenin Blanc (Clarksburg),Chenin Blanc,Baron Herzog
52712,Argentina,Trapiche has never bottled much Sauvignon Blan...,Pure,86,15.0,Mendoza Province,Uco Valley,,Michael Schachner,@wineschach,Trapiche 2016 Pure Sauvignon Blanc (Uco Valley),Sauvignon Blanc,Trapiche


Gather some statistics

In [None]:
df.describe()

Unnamed: 0,points,price
count,65499.0,60829.0
mean,88.434037,35.232932
std,3.03031,39.477858
min,80.0,4.0
25%,86.0,17.0
50%,88.0,25.0
75%,91.0,42.0
max,100.0,2500.0


In [None]:
df.taster_name.describe()

count          51856
unique            19
top       Roger Voss
freq           13045
Name: taster_name, dtype: object

In [None]:
df.points.mean()

88.43403716087269

In [None]:
df.taster_name.unique()

array(['Kerin O’Keefe', 'Roger Voss', 'Paul Gregutt',
       'Alexander Peartree', 'Michael Schachner', 'Anna Lee C. Iijima',
       'Virginie Boone', 'Matt Kettmann', nan, 'Sean P. Sullivan',
       'Jim Gordon', 'Joe Czerwinski', 'Anne Krebiehl\xa0MW',
       'Lauren Buzzeo', 'Mike DeSimone', 'Jeff Jenssen',
       'Susan Kostrzewa', 'Carrie Dykes', 'Fiona Adams',
       'Christina Pickard'], dtype=object)

In [None]:
df.taster_name.value_counts()

Roger Voss            13045
Michael Schachner      7752
Kerin O’Keefe          5313
Paul Gregutt           4851
Virginie Boone         4696
Matt Kettmann          3035
Joe Czerwinski         2605
Sean P. Sullivan       2358
Anna Lee C. Iijima     2134
Jim Gordon             2032
Anne Krebiehl MW       1769
Lauren Buzzeo           938
Susan Kostrzewa         593
Jeff Jenssen            234
Mike DeSimone           231
Alexander Peartree      210
Carrie Dykes             45
Fiona Adams              11
Christina Pickard         4
Name: taster_name, dtype: int64

In [None]:
df.points.map(lambda p: p - df.points.mean())

0       -1.434037
1       -1.434037
2       -1.434037
3       -1.434037
4       -1.434037
           ...   
65494    1.565963
65495    1.565963
65496    1.565963
65497    1.565963
65498    1.565963
Name: points, Length: 65499, dtype: float64

In [None]:
mean = df.points.mean()

def apply_fn(row):
    row.points = row.points - mean
    return row

df.apply(apply_fn, axis='columns')

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,-1.434037,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,-1.434037,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
2,US,"Tart and snappy, the flavors of lime flesh and...",,-1.434037,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm
3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,-1.434037,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian
4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,-1.434037,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks
...,...,...,...,...,...,...,...,...,...,...,...,...,...
65494,France,Made from young vines from the Vaulorent porti...,Fourchaume Premier Cru,1.565963,45.0,Burgundy,Chablis,,Roger Voss,@vossroger,William Fèvre 2005 Fourchaume Premier Cru (Ch...,Chardonnay,William Fèvre
65495,Australia,"This is a big, fat, almost sweet-tasting Caber...",,1.565963,22.0,South Australia,McLaren Vale,,Joe Czerwinski,@JoeCz,Tapestry 2005 Cabernet Sauvignon (McLaren Vale),Cabernet Sauvignon,Tapestry
65496,US,"Much improved over the unripe 2005, Fritz's 20...",Estate,1.565963,20.0,California,Dry Creek Valley,Sonoma,,,Fritz 2006 Estate Sauvignon Blanc (Dry Creek V...,Sauvignon Blanc,Fritz
65497,US,This wine wears its 15.8% alcohol better than ...,Block 24,1.565963,31.0,California,Napa Valley,Napa,,,Hendry 2004 Block 24 Primitivo (Napa Valley),Primitivo,Hendry


## Grouping and Sorting

In [84]:
df.groupby('points')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7bf1e5866a70>

In [83]:
df.groupby('points').points.count()

points
80      155
81      305
82      923
83     1442
84     3490
85     5082
86     6179
87     8872
88     8423
89     5724
90     7697
91     6016
92     4917
93     3268
94     1905
95      678
96      262
97       99
98       39
99       15
100       8
Name: points, dtype: int64

In [85]:
df.groupby('points').price.min()

points
80       5.0
81       5.0
82       5.0
83       4.0
84       4.0
85       4.0
86       4.0
87       5.0
88       6.0
89       7.0
90       8.0
91       7.0
92      11.0
93      13.0
94      13.0
95      20.0
96      27.0
97      40.0
98      50.0
99      75.0
100    150.0
Name: price, dtype: float64

In [86]:
df.groupby(['country', 'province']).apply(lambda df_: df_.loc[df_.points.idxmax()])

Unnamed: 0_level_0,Unnamed: 1_level_0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
country,province,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Argentina,Mendoza Province,Argentina,If you love massive Argentine reds with purity...,Finca Pedregal Single Vineyard Barrancas Maipú...,95,74.0,Mendoza Province,Mendoza,,Michael Schachner,@wineschach,Pascual Toso 2014 Finca Pedregal Single Vineya...,Cabernet Sauvignon-Malbec,Pascual Toso
Argentina,Other,Argentina,This single-vineyard Malbec blend from vineyar...,Chañar Punco,94,68.0,Other,Calchaquí Valley,,Michael Schachner,@wineschach,El Esteco 2013 Chañar Punco Red (Calchaquí Val...,Red Blend,El Esteco
Armenia,Armenia,Armenia,"Medium straw in the glass, this wine has a nos...",Estate Bottled,87,14.0,Armenia,,,Mike DeSimone,@worldwineguys,Van Ardi 2015 Estate Bottled Kangoun (Armenia),Kangoun,Van Ardi
Australia,Australia Other,Australia,Writes the book on how to make a wine filled w...,Sarah's Blend,93,15.0,Australia Other,South Eastern Australia,,,,Marquis Philips 2000 Sarah's Blend Red (South ...,Red Blend,Marquis Philips
Australia,New South Wales,Australia,This is full and rich but not overly heavy or ...,Botrytis,91,19.0,New South Wales,Riverina,,Joe Czerwinski,@JoeCz,Three Bridges 2013 Botrytis Semillon (Riverina),Sémillon,Three Bridges
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Uruguay,Juanico,Uruguay,This mature Bordeaux-style blend is earthy on ...,Preludio Barrel Select Lote N 77,90,45.0,Juanico,,,Michael Schachner,@wineschach,Familia Deicas 2004 Preludio Barrel Select Lot...,Red Blend,Familia Deicas
Uruguay,Montevideo,Uruguay,"A rich, heady bouquet offers aromas of blackbe...",Monte Vide Eu Tannat-Merlot-Tempranillo,91,60.0,Montevideo,,,Michael Schachner,@wineschach,Bouza 2015 Monte Vide Eu Tannat-Merlot-Tempran...,Red Blend,Bouza
Uruguay,Progreso,Uruguay,RPF is regularly one of Uruguay's better Tanna...,RPF,88,20.0,Progreso,,,Michael Schachner,@wineschach,Pisano 2013 RPF Tannat (Progreso),Tannat,Pisano
Uruguay,San Jose,Uruguay,"Baked, sweet, heavy aromas turn earthy with ti...",El Preciado Gran Reserva,87,50.0,San Jose,,,Michael Schachner,@wineschach,Castillo Viejo 2005 El Preciado Gran Reserva R...,Red Blend,Castillo Viejo


In [87]:
df.groupby(['country']).price.agg([len, min, max])

Unnamed: 0_level_0,len,min,max
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Argentina,1907,4.0,230.0
Armenia,1,14.0,14.0
Australia,1177,6.0,850.0
Austria,1635,7.0,150.0
Bosnia and Herzegovina,1,13.0,13.0
Brazil,31,10.0,45.0
Bulgaria,68,8.0,55.0
Canada,108,12.0,120.0
Chile,2258,5.0,400.0
Croatia,44,13.0,65.0


In [88]:
# Multi-Index

countries_reviewed = df.groupby(['country', 'province']).description.agg([len])
countries_reviewed

Unnamed: 0_level_0,Unnamed: 1_level_0,len
country,province,Unnamed: 2_level_1
Argentina,Mendoza Province,1635
Argentina,Other,272
Armenia,Armenia,1
Australia,Australia Other,131
Australia,New South Wales,34
...,...,...
Uruguay,Juanico,8
Uruguay,Montevideo,10
Uruguay,Progreso,5
Uruguay,San Jose,3


In [89]:
countries_reviewed.index

MultiIndex([('Argentina',  'Mendoza Province'),
            ('Argentina',             'Other'),
            (  'Armenia',           'Armenia'),
            ('Australia',   'Australia Other'),
            ('Australia',   'New South Wales'),
            ('Australia',   'South Australia'),
            ('Australia',          'Tasmania'),
            ('Australia',          'Victoria'),
            ('Australia', 'Western Australia'),
            (  'Austria',           'Austria'),
            ...
            (       'US',        'Washington'),
            (       'US', 'Washington-Oregon'),
            (  'Ukraine',           'Ukraine'),
            (  'Uruguay',         'Atlantida'),
            (  'Uruguay',         'Canelones'),
            (  'Uruguay',           'Juanico'),
            (  'Uruguay',        'Montevideo'),
            (  'Uruguay',          'Progreso'),
            (  'Uruguay',          'San Jose'),
            (  'Uruguay',           'Uruguay')],
           names=['coun

In [98]:
countries_reviewed.loc["Uruguay", "Montevideo"]

len    10
Name: (Uruguay, Montevideo), dtype: int64

In [101]:
countries_reviewed.reset_index()

Unnamed: 0,country,province,len
0,Argentina,Mendoza Province,1635
1,Argentina,Other,272
2,Armenia,Armenia,1
3,Australia,Australia Other,131
4,Australia,New South Wales,34
...,...,...,...
380,Uruguay,Juanico,8
381,Uruguay,Montevideo,10
382,Uruguay,Progreso,5
383,Uruguay,San Jose,3


In [103]:
# Sorting

countries_reviewed.sort_values(by='len', ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,len
country,province,Unnamed: 2_level_1
US,California,18122
US,Washington,4308
France,Bordeaux,3014
Italy,Tuscany,2985
US,Oregon,2691
...,...,...
Slovenia,Dolenjska,1
Slovenia,Kras,1
Slovenia,Slovenia,1
South Africa,Vlootenburg,1


In [104]:
countries_reviewed.sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,len
country,province,Unnamed: 2_level_1
Argentina,Mendoza Province,1635
Argentina,Other,272
Armenia,Armenia,1
Australia,Australia Other,131
Australia,New South Wales,34
...,...,...
Uruguay,Juanico,8
Uruguay,Montevideo,10
Uruguay,Progreso,5
Uruguay,San Jose,3


In [105]:
countries_reviewed.sort_values(by=['country', 'len'])

Unnamed: 0_level_0,Unnamed: 1_level_0,len
country,province,Unnamed: 2_level_1
Argentina,Other,272
Argentina,Mendoza Province,1635
Armenia,Armenia,1
Australia,Tasmania,23
Australia,New South Wales,34
...,...,...
Uruguay,Progreso,5
Uruguay,Uruguay,7
Uruguay,Juanico,8
Uruguay,Montevideo,10


## Tasks

###  Given series A and series B

In [None]:
series_a = pd.Series([1, 2, 4, 3])
series_b = pd.Series([3, 4, 5, 6])

- Items is series A not present in series B

- Intersection of series

- Items presented only in one of the series, not in both

### Merge by column pairs: fruit-pazham, weight-kilo

In [None]:
df1 = pd.DataFrame({'fruit': ['apple', 'banana', 'orange'] * 3,
                    'weight': ['high', 'medium', 'low'] * 3,
                    'price': np.random.randint(0, 15, 9)})

df2 = pd.DataFrame({'pazham': ['apple', 'orange', 'pine'] * 2,
                    'kilo': ['high', 'low'] * 3,
                    'price': np.random.randint(0, 15, 6)})

Lets explore dropping duplicate rows

###  Reverse all rows (first row should become last etc.)

In [None]:
df = pd.DataFrame(np.arange(30).reshape(-1, 6), columns=map(lambda x: f'column_{x}', range(6)))

### Get column name with the highest number of row-wise maximum’s in dataframe

### Find the positions of numbers that are multiples of N

In [None]:
N = 5

In [None]:
df

Unnamed: 0,column_0,column_1,column_2,column_3,column_4
0,39,37,22,62,69
1,11,86,77,44,14
2,81,7,72,89,4
3,38,92,97,72,14
4,14,92,2,26,97
5,30,76,92,96,47
6,43,55,69,70,23
7,99,55,80,26,49
8,86,4,58,58,35
9,68,91,68,67,5


Now try to treat rows and columns as coordinates. Return list (or array) of pairs for such elements. (one-liner)

### Compute the minimum-by-maximum for every row of dataframe

In [None]:
df

Unnamed: 0,column_0,column_1,column_2,column_3,column_4
0,39,37,22,62,69
1,11,86,77,44,14
2,81,7,72,89,4
3,38,92,97,72,14
4,14,92,2,26,97
5,30,76,92,96,47
6,43,55,69,70,23
7,99,55,80,26,49
8,86,4,58,58,35
9,68,91,68,67,5


In [None]:
# your code
# hint: use df.min, max or np.min, max

### Normalize all columns of df by subtracting the column mean and divide by standard deviation.

In [None]:
df

Unnamed: 0,column_0,column_1,column_2,column_3,column_4
0,39,37,22,62,69
1,11,86,77,44,14
2,81,7,72,89,4
3,38,92,97,72,14
4,14,92,2,26,97
5,30,76,92,96,47
6,43,55,69,70,23
7,99,55,80,26,49
8,86,4,58,58,35
9,68,91,68,67,5


In [None]:
# your code
# hint: use apply (for what axis?), np.mean and np.std (or similar pandas methods)

### Range all columns of df such that the minimum value in each column is 0 and max is 1

In [None]:
df

Unnamed: 0,column_0,column_1,column_2,column_3,column_4
0,39,37,22,62,69
1,11,86,77,44,14
2,81,7,72,89,4
3,38,92,97,72,14
4,14,92,2,26,97
5,30,76,92,96,47
6,43,55,69,70,23
7,99,55,80,26,49
8,86,4,58,58,35
9,68,91,68,67,5


In [None]:
# your code
# same as prev task. google for the minmax normalization formula

### Create a column that contains the second-largest value in each row?

In [None]:
df

Unnamed: 0,column_0,column_1,column_2,column_3,column_4
0,39,37,22,62,69
1,11,86,77,44,14
2,81,7,72,89,4
3,38,92,97,72,14
4,14,92,2,26,97
5,30,76,92,96,47
6,43,55,69,70,23
7,99,55,80,26,49
8,86,4,58,58,35
9,68,91,68,67,5


In [None]:
# your code
# hint: apply with sorting

### Split a text column into two separate columns

In [None]:
df_text = pd.DataFrame(
    {
        'row':
        [
            'id\t Name, Surname',
            '2\t Nadal, Raphael',
            '5\t Djokovic,  Novak',
            '1\t Federer, Roger'
        ]
    }
)

In [None]:
df_text

Unnamed: 0,row
0,"id\t Name, Surname"
1,"2\t Nadal, Raphael"
2,"5\t Djokovic, Novak"
3,"1\t Federer, Roger"


---

[Pandas exercises in Python](https://www.machinelearningplus.com/python/101-pandas-exercises-python/)