#### basics

1. apply() is used to apply a function along an axis of the DataFrame or on values of Series.

2. applymap() is used to apply a function to a DataFrame elementwise.

3. map() is used to substitute each value in a Series with another value.

In [1]:
import pandas as pd
df = pd.DataFrame({ 'A': [1,2,3,4], 
                   'B': [10,20,30,40],
                   'C': [20,40,60,80]
                  }, 
                  index=['Row 1', 'Row 2', 'Row 3', 'Row 4'])

In [2]:
df

Unnamed: 0,A,B,C
Row 1,1,10,20
Row 2,2,20,40
Row 3,3,30,60
Row 4,4,40,80


#### how to use apply


In [3]:
def custom_sum(row):
    return row.sum()

In [6]:
df['D'] = df.apply(custom_sum, axis = 1)

In [7]:
df

Unnamed: 0,A,B,C,D
Row 1,1,10,20,31.0
Row 2,2,20,40,62.0
Row 3,3,30,60,93.0
Row 4,4,40,80,124.0


In [8]:
df.loc['Row 5'] = df.apply(custom_sum, axis = 0)

In [9]:
df

Unnamed: 0,A,B,C,D
Row 1,1.0,10.0,20.0,31.0
Row 2,2.0,20.0,40.0,62.0
Row 3,3.0,30.0,60.0,93.0
Row 4,4.0,40.0,80.0,124.0
Row 5,10.0,100.0,200.0,310.0


In [10]:
def mul_2(val):
    return val * 2

In [11]:
df['E'] = df['C'].apply(mul_2)

In [12]:
df

Unnamed: 0,A,B,C,D,E
Row 1,1.0,10.0,20.0,31.0,40.0
Row 2,2.0,20.0,40.0,62.0,80.0
Row 3,3.0,30.0,60.0,93.0,120.0
Row 4,4.0,40.0,80.0,124.0,160.0
Row 5,10.0,100.0,200.0,310.0,400.0


In [14]:
df['D'] = df.apply(lambda x: sum(x), axis = 1)

In [15]:
df

Unnamed: 0,A,B,C,D,E
Row 1,1.0,10.0,20.0,102.0,40.0
Row 2,2.0,20.0,40.0,204.0,80.0
Row 3,3.0,30.0,60.0,306.0,120.0
Row 4,4.0,40.0,80.0,408.0,160.0
Row 5,10.0,100.0,200.0,1020.0,400.0


In [16]:
df.loc['Row 6'] = df.apply(lambda x: x.sum(), axis = 0)

In [17]:
df

Unnamed: 0,A,B,C,D,E
Row 1,1.0,10.0,20.0,102.0,40.0
Row 2,2.0,20.0,40.0,204.0,80.0
Row 3,3.0,30.0,60.0,306.0,120.0
Row 4,4.0,40.0,80.0,408.0,160.0
Row 5,10.0,100.0,200.0,1020.0,400.0
Row 6,20.0,200.0,400.0,2040.0,800.0


In [18]:
df['F'] = df['C'].apply(lambda x:x*2)

In [19]:
df

Unnamed: 0,A,B,C,D,E,F
Row 1,1.0,10.0,20.0,102.0,40.0,40.0
Row 2,2.0,20.0,40.0,204.0,80.0,80.0
Row 3,3.0,30.0,60.0,306.0,120.0,120.0
Row 4,4.0,40.0,80.0,408.0,160.0,160.0
Row 5,10.0,100.0,200.0,1020.0,400.0,400.0
Row 6,20.0,200.0,400.0,2040.0,800.0,800.0


In [21]:
df.apply(lambda x: x.sum(), axis = 1, result_type = 'broadcast')

Unnamed: 0,A,B,C,D,E,F
Row 1,213.0,213.0,213.0,213.0,213.0,213.0
Row 2,426.0,426.0,426.0,426.0,426.0,426.0
Row 3,639.0,639.0,639.0,639.0,639.0,639.0
Row 4,852.0,852.0,852.0,852.0,852.0,852.0
Row 5,2130.0,2130.0,2130.0,2130.0,2130.0,2130.0
Row 6,4260.0,4260.0,4260.0,4260.0,4260.0,4260.0


In [22]:
def cal_mul_col(row):
    return [row['A'] * 2, row['B'] * 3]

In [23]:
df.apply(cal_mul_col, axis = 1, result_type = 'expand')

Unnamed: 0,0,1
Row 1,2.0,30.0
Row 2,4.0,60.0
Row 3,6.0,90.0
Row 4,8.0,120.0
Row 5,20.0,300.0
Row 6,40.0,600.0


In [24]:
add_col = df.apply(cal_mul_col, axis = 1, result_type = 'expand')

In [25]:
df[add_col.columns] = add_col

In [26]:
df

Unnamed: 0,A,B,C,D,E,F,0,1
Row 1,1.0,10.0,20.0,102.0,40.0,40.0,2.0,30.0
Row 2,2.0,20.0,40.0,204.0,80.0,80.0,4.0,60.0
Row 3,3.0,30.0,60.0,306.0,120.0,120.0,6.0,90.0
Row 4,4.0,40.0,80.0,408.0,160.0,160.0,8.0,120.0
Row 5,10.0,100.0,200.0,1020.0,400.0,400.0,20.0,300.0
Row 6,20.0,200.0,400.0,2040.0,800.0,800.0,40.0,600.0


In [29]:
df['new'] = df.apply(cal_mul_col, axis = 1, result_type = 'reduce')

In [30]:
df

Unnamed: 0,A,B,C,D,E,F,0,1,new
Row 1,1.0,10.0,20.0,102.0,40.0,40.0,2.0,30.0,"[2.0, 30.0]"
Row 2,2.0,20.0,40.0,204.0,80.0,80.0,4.0,60.0,"[4.0, 60.0]"
Row 3,3.0,30.0,60.0,306.0,120.0,120.0,6.0,90.0,"[6.0, 90.0]"
Row 4,4.0,40.0,80.0,408.0,160.0,160.0,8.0,120.0,"[8.0, 120.0]"
Row 5,10.0,100.0,200.0,1020.0,400.0,400.0,20.0,300.0,"[20.0, 300.0]"
Row 6,20.0,200.0,400.0,2040.0,800.0,800.0,40.0,600.0,"[40.0, 600.0]"


In [32]:
import numpy as np

In [35]:
df.map(np.square)

Unnamed: 0,A,B,C,D,E,F,0,1,new
Row 1,1.0,100.0,400.0,10404.0,1600.0,1600.0,4.0,900.0,"[4.0, 900.0]"
Row 2,4.0,400.0,1600.0,41616.0,6400.0,6400.0,16.0,3600.0,"[16.0, 3600.0]"
Row 3,9.0,900.0,3600.0,93636.0,14400.0,14400.0,36.0,8100.0,"[36.0, 8100.0]"
Row 4,16.0,1600.0,6400.0,166464.0,25600.0,25600.0,64.0,14400.0,"[64.0, 14400.0]"
Row 5,100.0,10000.0,40000.0,1040400.0,160000.0,160000.0,400.0,90000.0,"[400.0, 90000.0]"
Row 6,400.0,40000.0,160000.0,4161600.0,640000.0,640000.0,1600.0,360000.0,"[1600.0, 360000.0]"


In [36]:
s = pd.Series(['cat', 'dog', np.nan, 'rabbit'])

In [37]:
s

0       cat
1       dog
2       NaN
3    rabbit
dtype: object

In [38]:
s.map({'cat':'kitten', 'dog': 'puppy'})

0    kitten
1     puppy
2       NaN
3       NaN
dtype: object

In [39]:
s.map('i am a {}'.format)

0       i am a cat
1       i am a dog
2       i am a nan
3    i am a rabbit
dtype: object

In [40]:
s.map('i am a {}'.format, na_action = 'ignore')

0       i am a cat
1       i am a dog
2              NaN
3    i am a rabbit
dtype: object

In [2]:
import pandas as pd
import numpy as np

In [3]:
pd.options.display.max_columns = 500
pd.options.display.max_rows = 500

In [5]:
url = '/Users/jiashu/Downloads/IMDB-Movie-Data.csv'

In [6]:
movies = pd.read_csv(url)

In [7]:
movies.head()

Unnamed: 0,Rank,Title,Genre,Description,Director,Actors,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore
0,1,Guardians of the Galaxy,"Action,Adventure,Sci-Fi",A group of intergalactic criminals are forced ...,James Gunn,"Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S...",2014,121,8.1,757074,333.13,76.0
1,2,Prometheus,"Adventure,Mystery,Sci-Fi","Following clues to the origin of mankind, a te...",Ridley Scott,"Noomi Rapace, Logan Marshall-Green, Michael Fa...",2012,124,7.0,485820,126.46,65.0
2,3,Split,"Horror,Thriller",Three girls are kidnapped by a man with a diag...,M. Night Shyamalan,"James McAvoy, Anya Taylor-Joy, Haley Lu Richar...",2016,117,7.3,157606,138.12,62.0
3,4,Sing,"Animation,Comedy,Family","In a city of humanoid animals, a hustling thea...",Christophe Lourdelet,"Matthew McConaughey,Reese Witherspoon, Seth Ma...",2016,108,7.2,60545,270.32,59.0
4,5,Suicide Squad,"Action,Adventure,Fantasy",A secret government agency recruits some of th...,David Ayer,"Will Smith, Jared Leto, Margot Robbie, Viola D...",2016,123,6.2,393727,325.02,40.0


In [8]:
movies.rename(columns = {'Runtime (Minutes)': 'run_m', 'Revenue (Millions)': 'rev_m'}, inplace = True)

In [13]:
def custom_rating(genre, rating):
    if 'Thriller' in genre:
        return min(10, rating+1)
    elif 'Comedy' in genre:
        return max(0, rating-1)
    else:
        return rating

In [15]:
movies['custom_rating'] = movies.apply(lambda x: custom_rating(x['Genre'], x['Rating']), axis = 1)

In [17]:
df_gt_8 = movies[movies['Rating'] > 8]

In [19]:
and_df = movies[(movies['Rating'] > 8) & (movies['Votes'] > 100000)]

In [24]:
new_df = movies[movies.apply(lambda x: len(x['Title'].split(' ')) >= 4, axis = 1)]

In [25]:
new_df.head()

Unnamed: 0,Rank,Title,Genre,Description,Director,Actors,Year,run_m,Rating,Votes,rev_m,Metascore,custom_rating
0,1,Guardians of the Galaxy,"Action,Adventure,Sci-Fi",A group of intergalactic criminals are forced ...,James Gunn,"Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S...",2014,121,8.1,757074,333.13,76.0,8.1
8,9,The Lost City of Z,"Action,Adventure,Biography","A true-life drama, centering on British explor...",James Gray,"Charlie Hunnam, Robert Pattinson, Sienna Mille...",2016,141,7.1,7188,8.01,78.0,7.1
10,11,Fantastic Beasts and Where to Find Them,"Adventure,Family,Fantasy",The adventures of writer Newt Scamander in New...,David Yates,"Eddie Redmayne, Katherine Waterston, Alison Su...",2016,133,7.5,232072,234.02,66.0,7.5
15,16,The Secret Life of Pets,"Animation,Adventure,Comedy",The quiet life of a terrier named Max is upend...,Chris Renaud,"Louis C.K., Eric Stonestreet, Kevin Hart, Lake...",2016,87,6.6,120259,368.31,61.0,5.6
21,22,Manchester by the Sea,Drama,A depressed uncle is asked to take care of his...,Kenneth Lonergan,"Casey Affleck, Michelle Williams, Kyle Chandle...",2016,137,7.9,134213,47.7,96.0,7.9


In [31]:
year_revenue_dict = movies.groupby('Year').agg({'rev_m': np.mean}).to_dict()['rev_m']

  year_revenue_dict = movies.groupby('Year').agg({'rev_m': np.mean}).to_dict()['rev_m']


In [32]:
year_revenue_dict

{2006: 86.29666666666667,
 2007: 87.88224489795917,
 2008: 99.08274509803923,
 2009: 112.60127659574468,
 2010: 105.08157894736841,
 2011: 87.61225806451613,
 2012: 107.97328125,
 2013: 87.12181818181818,
 2014: 85.07872340425531,
 2015: 78.3550442477876,
 2016: 54.690975609756094}

In [37]:
def bool_provider(revenue, year):
    return revenue < year_revenue_dict[year]

In [40]:
new_df = movies[movies.apply(lambda x: bool_provider(x['rev_m'], x['Year']), axis = 1)]

In [41]:
new_df

Unnamed: 0,Rank,Title,Genre,Description,Director,Actors,Year,run_m,Rating,Votes,rev_m,Metascore,custom_rating
5,6,The Great Wall,"Action,Adventure,Fantasy",European mercenaries searching for black powde...,Yimou Zhang,"Matt Damon, Tian Jing, Willem Dafoe, Andy Lau",2016,103,6.1,56036,45.13,42.0,6.1
8,9,The Lost City of Z,"Action,Adventure,Biography","A true-life drama, centering on British explor...",James Gray,"Charlie Hunnam, Robert Pattinson, Sienna Mille...",2016,141,7.1,7188,8.01,78.0,7.1
14,15,Colossal,"Action,Comedy,Drama",Gloria is an out-of-work party girl forced to ...,Nacho Vigalondo,"Anne Hathaway, Jason Sudeikis, Austin Stowell,...",2016,109,6.4,8612,2.87,70.0,5.4
18,19,Lion,"Biography,Drama",A five-year-old Indian boy gets lost on the st...,Garth Davis,"Dev Patel, Nicole Kidman, Rooney Mara, Sunny P...",2016,118,8.1,102061,51.69,69.0,8.1
20,21,Gold,"Adventure,Drama,Thriller","Kenny Wells, a prospector desperate for a luck...",Stephen Gaghan,"Matthew McConaughey, Edgar Ramírez, Bryce Dall...",2016,120,6.7,19053,7.22,49.0,7.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...
993,994,Resident Evil: Afterlife,"Action,Adventure,Horror",While still out to destroy the evil Umbrella C...,Paul W.S. Anderson,"Milla Jovovich, Ali Larter, Wentworth Miller,K...",2010,97,5.9,140900,60.13,37.0,5.9
994,995,Project X,Comedy,3 high school seniors throw a birthday party t...,Nima Nourizadeh,"Thomas Mann, Oliver Cooper, Jonathan Daniel Br...",2012,88,6.7,164088,54.72,48.0,5.7
996,997,Hostel: Part II,Horror,Three American college students studying abroa...,Eli Roth,"Lauren German, Heather Matarazzo, Bijou Philli...",2007,94,5.5,73152,17.54,46.0,5.5
997,998,Step Up 2: The Streets,"Drama,Music,Romance",Romantic sparks occur between two dance studen...,Jon M. Chu,"Robert Hoffman, Briana Evigan, Cassie Ventura,...",2008,98,6.2,70699,58.01,50.0,6.2
