## Pandas

In [6]:
import pandas as pd, numpy as np, time
import random
import names
import warnings
warnings.filterwarnings('ignore')

In [7]:
mock_data = []
for x in range(10000):
    person_id = x
    first_name = names.get_first_name()
    last_name = names.get_last_name()
    phone_number = '+1-{}-{:04d}-{:04d}'.format(
        random.randint(2, 9),
        random.randint(1, 9999),
        random.randint(1, 9999)
    )
    some_val_1 = person_id + 1
    some_val_2 = first_name[::-1]  # reversed via slice
    some_val_3 = last_name + first_name
    # randomly leave last two attributes blank for some records
    if random.randint(1, 5) == 1:
        some_val_4 = np.NaN
        some_val_5 = np.NaN
    else:
        some_val_4 = person_id * random.randint(1, 29)
        some_val_5 = random.randint(-99, 9999999)
    person_record = {
        'person_id': person_id, 'first_name': first_name, 'last_name': last_name, 
        'phone_number': phone_number, 'some_val_1': some_val_1, 'some_val_2': some_val_2, 
        'some_val_3': some_val_3, 'some_val_4': some_val_4, 'some_val_5': some_val_5 
    }
    mock_data.append(person_record)

In [8]:
df = pd.DataFrame.from_dict(mock_data)

### Pandas Profiling

In [10]:
import pandas_profiling as pp
pp.ProfileReport(df)



#### Multiple filter criteria

In [13]:
crti1 = df.some_val_4 > 5.0
crti2 = df.some_val_5 > 99
crti3 = df.last_name.str.slice(0,1) == "A"

In [14]:
df[crti1 & crti2 & crti3].head()

Unnamed: 0,first_name,last_name,person_id,phone_number,some_val_1,some_val_2,some_val_3,some_val_4,some_val_5
15,Hortense,Allard,15,+1-5-3426-2359,16,esnetroH,AllardHortense,330.0,4430471.0
28,Arturo,Anyan,28,+1-7-1646-3653,29,orutrA,AnyanArturo,532.0,6193686.0
125,Alfred,Aromin,125,+1-6-7455-5394,126,derflA,ArominAlfred,2625.0,7597053.0
174,Mike,Armijo,174,+1-2-4202-8530,175,ekiM,ArmijoMike,3132.0,1606305.0
226,Harold,Adkins,226,+1-3-6285-3527,227,dloraH,AdkinsHarold,5424.0,545176.0


#### difference between each row and previous row

In [15]:
df['change'] = df.some_val_5.diff()

#### groupby 

In [17]:
df.groupby("first_name")['change'].sum().head()

first_name
Aaron     -29597772.0
Abby        1643325.0
Abe        -1199110.0
Abel              0.0
Abigail           0.0
Name: change, dtype: float64

In [18]:
df.groupby("first_name").agg({'change':['mean','max'],
                              'some_val_4':'mean'
                             }).head()

Unnamed: 0_level_0,change,change,some_val_4
Unnamed: 0_level_1,mean,max,mean
first_name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Aaron,-2276752.0,4908450.0,83480.6875
Abby,1643325.0,1643325.0,11480.0
Abe,-1199110.0,-1199110.0,65666.0
Abel,,,
Abigail,,,


In [19]:
df.groupby("last_name").agg({'change':['mean','max'],
                              'some_val_4':['max']
                             }).head()

Unnamed: 0_level_0,change,change,some_val_4
Unnamed: 0_level_1,mean,max,max
last_name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Abad,1376415.0,1376415.0,10154.0
Abe,7913334.0,7913334.0,10269.0
Abercrombie,,,227214.0
Abernathy,6205321.0,6205321.0,208300.0
Ables,-803040.0,-803040.0,80353.0


In [20]:
df.groupby('last_name')['last_name'].transform('count').head()

0    1
1    6
2    3
3    1
4    1
Name: last_name, dtype: int64

#### Concat

In [21]:
df.first_name.str.cat(df.last_name, ',').head()

0     Samuel,Lettieri
1         Donnie,Dunn
2    Patricia,Cameron
3         Curtis,Sule
4      Vickie,Antoine
Name: first_name, dtype: object

#### Rank

In [22]:
df['score_rank'] = df['change'].rank(ascending=1)

In [23]:
df[(df.score_rank > 100) & (df.change > 0)].sort_values('score_rank').head()

Unnamed: 0,first_name,last_name,person_id,phone_number,some_val_1,some_val_2,some_val_3,some_val_4,some_val_5,change,score_rank
8514,Cody,Garrett,8514,+1-8-9735-4990,8515,ydoC,GarrettCody,161766.0,1474841.0,295.0,3120.0
3955,Mary,Walker,3955,+1-7-9029-0281,3956,yraM,WalkerMary,35595.0,1743371.0,911.0,3121.0
6040,Jason,Davis,6040,+1-9-0296-4471,6041,nosaJ,DavisJason,169120.0,8778295.0,1544.0,3122.0
7201,Dawn,Snipes,7201,+1-9-9706-1702,7202,nwaD,SnipesDawn,86412.0,9490901.0,1756.0,3123.0
9724,Robert,Depaul,9724,+1-7-6525-5854,9725,treboR,DepaulRobert,252824.0,4322289.0,2223.0,3124.0
