# Pandas Tricks: Real Python Course

## Video 2: Configure Options and Settings at Interpretter Startup

In this video we learn how to set some settings when using pandas

In [7]:
import pandas as pd
import numpy as np

def start():
    options = {
        'display': {
            'max_columns': None,
            'max_colwidth': 25,
            'expand_frame_repr': False,
            'max_rows': 10,
            'max_seq_items': 50,
            'precision': 2, 
            'show_dimensions': False
        },
        'mode': {
            'chained_assignment': None
        }
    }
    
    for category, option in options.items():
            for op, value in option.items():
                pd.set_option(f'{category}.{op}', value)
                
#if __name__ == '__main__':
#    start()
#   del start

In [8]:
start()

In [9]:
data = pd.DataFrame(np.random.randn(50, 3))

In [10]:
data

Unnamed: 0,0,1,2
0,-0.51,-1.47,2.05
1,0.48,-0.05,-0.17
2,0.75,1.65,-0.99
3,1.48,-1.41,0.26
4,0.05,-0.04,-0.98
...,...,...,...
45,1.27,-0.29,-0.64
46,-2.16,-1.60,0.58
47,0.23,-0.54,-0.03
48,0.27,1.97,-2.74


## Video 3: Make Toy Data Structures with pandas' Testing Module

pandas has a module that allows you to quickly create sample data

In [17]:
import pandas._testing as tm
import numpy as np

tm.N, tm.K = 15, 3
np.random.seed(444)
tm.makeTimeDataFrame(freq="M").head()

Unnamed: 0,A,B,C,D
2000-01-31,0.36,0.27,0.35,-0.54
2000-02-29,0.38,-0.48,-0.43,-0.89
2000-03-31,1.38,0.3,-0.5,0.11
2000-04-30,1.18,-0.18,0.23,-0.74
2000-05-31,-0.94,1.18,-0.65,-0.08


In [18]:
tm.makeDataFrame().head()

Unnamed: 0,A,B,C,D
b8jgVbQbug,-0.75,-0.1,-0.06,0.04
OKCyyhkEvY,0.5,0.8,-0.17,-1.49
RtcTWq0AMT,-0.15,0.51,-0.09,-0.72
vtdamOujY0,-0.35,0.27,1.55,-0.05
tW49Zqe3lC,0.16,0.84,0.69,1.54


In [19]:
[i for i in dir(tm) if i.startswith('make')]

['makeBoolIndex',
 'makeCategoricalIndex',
 'makeCustomDataframe',
 'makeCustomIndex',
 'makeDataFrame',
 'makeDateIndex',
 'makeFloatIndex',
 'makeFloatSeries',
 'makeIntIndex',
 'makeIntervalIndex',
 'makeMissingDataframe',
 'makeMixedDataFrame',
 'makeMultiIndex',
 'makeNumericIndex',
 'makeObjectSeries',
 'makePeriodFrame',
 'makePeriodIndex',
 'makePeriodSeries',
 'makeRangeIndex',
 'makeStringIndex',
 'makeStringSeries',
 'makeTimeDataFrame',
 'makeTimeSeries',
 'makeTimedeltaIndex',
 'makeUIntIndex',
 'make_rand_series']

## Video 4: Take Advantage of Accessor Methods

An Accessor is an interface to access additional methods

In [22]:
import pandas as pd

#the Series object has 4 accessor types
pd.Series._accessors

{'cat', 'dt', 'sparse', 'str'}

In [23]:
addr = pd.Series([
    'Washington DC 20003',
    'Brooklyn, NY 11211-1755',
    'Omaha, NE 68154',
    'Pittsburgh, PA 15211'
])

In [25]:
s = 'hello'
s.upper()

'HELLO'

In [26]:
# to call upper on every item in the series you can't call upper
# yyou have to use an accessor str
addr.str.upper()

0        WASHINGTON DC 20003
1    BROOKLYN, NY 11211-1755
2            OMAHA, NE 68154
3       PITTSBURGH, PA 15211
dtype: object

In [29]:
addr.str.count(r'\d')

0    5
1    9
2    5
3    5
dtype: int64

In [30]:
daterng = pd.Series(pd.date_range('2019', periods=9, freq='Q'))

In [31]:
daterng

0   2019-03-31
1   2019-06-30
2   2019-09-30
3   2019-12-31
4   2020-03-31
5   2020-06-30
6   2020-09-30
7   2020-12-31
8   2021-03-31
dtype: datetime64[ns]

In [32]:
daterng.dt.day_name()

0       Sunday
1       Sunday
2       Monday
3      Tuesday
4      Tuesday
5      Tuesday
6    Wednesday
7     Thursday
8    Wednesday
dtype: object

In [34]:
daterng[daterng.dt.quarter >2]

2   2019-09-30
3   2019-12-31
6   2020-09-30
7   2020-12-31
dtype: datetime64[ns]

In [35]:
daterng[daterng.dt.is_year_end]

3   2019-12-31
7   2020-12-31
dtype: datetime64[ns]