In [1]:
import yfinance as yf
import pandas as pd
pd.set_option('display.max_rows', 5)

In [2]:
df = yf.download("AAPL", start="2023-5-20", end="2023-6-20", interval='15m')
df

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-05-22 09:30:00,173.979996,174.710007,173.649994,174.250000,174.250000,5363774
2023-05-22 09:45:00,174.225006,174.679993,173.985001,174.019897,174.019897,2494952
...,...,...,...,...,...,...
2023-06-16 15:30:00,184.919998,185.100006,184.710098,184.737900,184.737900,2358242
2023-06-16 15:45:00,184.735001,185.350006,184.270004,185.020004,185.020004,5864734


In [4]:
filt = (df['Open'] >= 180)
filt
# returns True/False for each entry, essentially serving like a mask

Datetime
2023-05-22 09:30:00    False
2023-05-22 09:45:00    False
                       ...  
2023-06-16 15:30:00     True
2023-06-16 15:45:00     True
Name: Open, Length: 494, dtype: bool

In [11]:
# Option 1 for passing in filter:
df[filt]
#Option 2 (more clear its a filter and not a row which can be confusing for Approach 1):
df.loc[filt]

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-06-02 09:30:00,181.029999,181.779999,180.559998,180.975006,180.975006,7800850
2023-06-02 09:45:00,180.979904,180.990005,179.690002,179.809998,179.809998,4316853
...,...,...,...,...,...,...
2023-06-16 15:30:00,184.919998,185.100006,184.710098,184.737900,184.737900,2358242
2023-06-16 15:45:00,184.735001,185.350006,184.270004,185.020004,185.020004,5864734


In [9]:
#Remember: .loc[ROWS, COLUMNS]
df.loc[filt, 'Volume']

Datetime
2023-06-02 09:30:00    7800850
2023-06-02 09:45:00    4316853
                        ...   
2023-06-16 15:30:00    2358242
2023-06-16 15:45:00    5864734
Name: Volume, Length: 207, dtype: int64

In [12]:
# getting opposite of filt results
df.loc[~filt]

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-05-22 09:30:00,173.979996,174.710007,173.649994,174.250000,174.250000,5363774
2023-05-22 09:45:00,174.225006,174.679993,173.985001,174.019897,174.019897,2494952
...,...,...,...,...,...,...
2023-06-08 15:00:00,179.929993,180.019897,179.729996,179.889999,179.889999,1506989
2023-06-08 15:15:00,179.889999,180.289993,179.889999,180.255005,180.255005,1353446


In [20]:
people = {
    "first": ["Corey", 'Jane', 'John', 'Mary'], 
    "last": ["Schafer", 'Doe', 'Doe', 'Jane'], 
    "email": ["CoreyMSchafer@gmail.com", 'JaneDoe@email.com', 'JohnDoe@email.com', 'MaryJane@email.com']
}
df1 = pd.DataFrame(people)
df1

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com
3,Mary,Jane,MaryJane@email.com


In [26]:
# isin and .str
possible_names = ['John', 'Mary']
filt1 = df1['first'].isin(possible_names)
filt2 = df1['email'].str.contains('gmail')

In [24]:
df1.loc[filt1]

Unnamed: 0,first,last,email
2,John,Doe,JohnDoe@email.com
3,Mary,Jane,MaryJane@email.com


In [27]:
df1.loc[filt2]

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com


In [36]:
# changing column names
df1.columns = ['first_name', 'last_name', 'email']

In [51]:
# changing all columns to uppercase
df1.columns = [x.upper() for x in df1.columns]
# replace spaces
df1.columns = df1.columns.str.replace(' ', '_')

Unnamed: 0,FIRST_NAME


In [64]:
df1.columns = [x.lower() for x in df1.columns]
df1.rename(columns={'first_name': 'first', 'last_name': 'last'}, inplace=True)

In [65]:
df1

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com
3,Mary,Jane,MaryJane@email.com


In [70]:
# Changing all column values of one row
df1.loc[2] = ['Jon', 'Smith', 'JonSmith@email.com']
# Changing specific columns of one row
df1.loc[2, ['last', 'email']] = ['Doe', 'JonDoe@email.com']

In [67]:
df1

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,Jon,Smith,JonSmith@email.com
3,Mary,Jane,MaryJane@email.com


In [71]:
df1

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,Jon,Doe,JonDoe@email.com
3,Mary,Jane,MaryJane@email.com


In [75]:
df1['email'] = df1['email'].str.lower()

Series([], Name: email, dtype: object)

In [81]:
"""
apply - DataFrames and Series
applymap - DataFrames
map - Series
replace - Series
"""
# eg applies len to every value in column email
df1['email'].apply(len)

0    23
1    17
2    16
3    18
Name: email, dtype: int64

In [86]:
 #passing functions to apply
def update_email(email):
        return email.upper()
df1['email'] = df1['email'].apply(update_email)
df1

Unnamed: 0,first,last,email
0,Corey,Schafer,COREYMSCHAFER@GMAIL.COM
1,Jane,Doe,JANEDOE@EMAIL.COM
2,Jon,Doe,JONDOE@EMAIL.COM
3,Mary,Jane,MARYJANE@EMAIL.COM


In [89]:
#passing lambdas to apply
df1['email'] = df1['email'].apply(lambda email: email.lower())

In [91]:
#When running apply on a df rather than a series, Pandas will run func on each series of the df
df1.apply(len)

first    4
last     4
email    4
dtype: int64

In [95]:
df1.apply(pd.Series.min)

first                      Corey
last                         Doe
email    coreymschafer@gmail.com
dtype: object

In [98]:
# applymap runs func on every cell. Only works on df not series
df1.applymap(len)

Unnamed: 0,first,last,email
0,5,7,23
1,4,3,17
2,3,3,16
3,4,4,18


In [107]:
df1 = df1.applymap(str.lower)
df1

Unnamed: 0,first,last,email
0,corey,schafer,coreymschafer@gmail.com
1,jane,doe,janedoe@email.com
2,jon,doe,jondoe@email.com
3,mary,jane,maryjane@email.com


In [108]:
# map only works on series
df1['first'].map({'mary': 'maryy', 'jon': 'john'})

0      NaN
1      NaN
2     john
3    maryy
Name: first, dtype: object

In [109]:
# replace does same as map but doesnt convert unpassed to NaN
df1['first'].replace({'mary': 'maryy', 'jon': 'john'})

0    corey
1     jane
2     john
3    maryy
Name: first, dtype: object