In [2]:
import pandas as pd
from decimal import Decimal
import io

"""
Apply a function to each row of a DataFrame to create a new column

"""

def create_price_label(row):
    price = Decimal(row['price'])
    price = round(price, 2)
    return row['name'].title() + " $" + str(price)

fruit_price = r"""name,price
banana,2.5
grape,5.356
apple,4.99
"""

df = pd.read_csv(io.StringIO(fruit_price))
df['label'] = df.apply(create_price_label, axis=1)

"axis 1 es por filas, axis 0 es por columnas"

print(df)

     name  price         label
0  banana  2.500  Banana $2.50
1   grape  5.356   Grape $5.36
2   apple  4.990   Apple $4.99


In [4]:
import pandas as pd
import io

city = r"""city,state
sydney,nsw
brisbane,qld
perth,sa
"""

df = pd.read_csv(io.StringIO(city))

df['state'] = df['state'].apply(lambda x: x.upper())
df['city'] = df['city'].apply(lambda x: x.title())
print(df)

       city state
0    Sydney   NSW
1  Brisbane   QLD
2     Perth    SA


In [7]:
import pandas as pd
import io

str_data = r"""
date,weather
2018-03-04,sunny
2018-03-05,sunny
2018-03-06,sunny
"""
df = pd.read_csv(io.StringIO(str_data))

assert (df['weather'] == 'sunny').all(), "Values are not the same in the column"
print(df)

         date weather
0  2018-03-04   sunny
1  2018-03-05   sunny
2  2018-03-06   sunny


In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import io
from scipy.stats import norm, binom

"""
https://en.wikipedia.org/wiki/Quantile

"""

# make data
str_data = r"""date,x,y
2016-12-31,0,2
2017-01-01,1,2
2017-01-02,2,2 
2017-01-03,3,2
2017-01-04,4,2
2017-01-05,5,2
2017-01-06,6,3
2017-01-07,7,3
2017-01-08,8,4
2017-01-09,9,4
"""
df = pd.read_csv(io.StringIO(str_data))
df.set_index(['date'], inplace=True)

df['x'] = df['x'].astype(float)
df['y'] = df['y'].astype(float)

df_copy = df.copy()

# replace - 1
print(df)
mask = (df['x'] > 0.0) & (df['x'] < 3.0)
df.loc[mask, 'x'] = -1
print(df)

# replace - 2
print(df_copy)
df_copy['x'] = np.where((df_copy['y'] == 3.0) | (df_copy['y'] == 4.0), np.nan, df_copy['x'])
print(df_copy)

              x    y
date                
2016-12-31  0.0  2.0
2017-01-01  1.0  2.0
2017-01-02  2.0  2.0
2017-01-03  3.0  2.0
2017-01-04  4.0  2.0
2017-01-05  5.0  2.0
2017-01-06  6.0  3.0
2017-01-07  7.0  3.0
2017-01-08  8.0  4.0
2017-01-09  9.0  4.0
              x    y
date                
2016-12-31  0.0  2.0
2017-01-01 -1.0  2.0
2017-01-02 -1.0  2.0
2017-01-03  3.0  2.0
2017-01-04  4.0  2.0
2017-01-05  5.0  2.0
2017-01-06  6.0  3.0
2017-01-07  7.0  3.0
2017-01-08  8.0  4.0
2017-01-09  9.0  4.0
              x    y
date                
2016-12-31  0.0  2.0
2017-01-01  1.0  2.0
2017-01-02  2.0  2.0
2017-01-03  3.0  2.0
2017-01-04  4.0  2.0
2017-01-05  5.0  2.0
2017-01-06  6.0  3.0
2017-01-07  7.0  3.0
2017-01-08  8.0  4.0
2017-01-09  9.0  4.0
              x    y
date                
2016-12-31  0.0  2.0
2017-01-01  1.0  2.0
2017-01-02  2.0  2.0
2017-01-03  3.0  2.0
2017-01-04  4.0  2.0
2017-01-05  5.0  2.0
2017-01-06  NaN  3.0
2017-01-07  NaN  3.0
2017-01-08  NaN  4.0
2017-01-09  N