```python
np.where(condition, 
         value if condition is true, 
         value if condition is false) 

```

In [1]:
# %load command.py

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity='all'

%config InlineBackend.figure_format='svg'
plt.rcParams['figure.dpi']=120

pd.options.display.float_format='{:,.2f}'.format
pd.set_option('display.max_colwidth', None)


In [2]:
df = pd.DataFrame({
    'fruit': ['Apple', 'Banana', 'Apple', 'Banana'],
    'supplier': ['T & C Bro', 'T & C Bro', 'JM Wholesales', 'JM Wholesales'],
    'weight (kg)': [1000,2000,3000,4000],
    'customer_rating': [4.8,3.2, 4.2, 4.3]
})
df

Unnamed: 0,fruit,supplier,weight (kg),customer_rating
0,Apple,T & C Bro,1000,4.8
1,Banana,T & C Bro,2000,3.2
2,Apple,JM Wholesales,3000,4.2
3,Banana,JM Wholesales,4000,4.3


In [3]:
tc_price = pd.DataFrame({
    'fruit': ['Apple', 'Banana', 'Orange', 'Pineapple'],
    'price (kg)': [1.1, 2, 2.9, 3.1]
})

tc_price


Unnamed: 0,fruit,price (kg)
0,Apple,1.1
1,Banana,2.0
2,Orange,2.9
3,Pineapple,3.1


In [4]:
jm_price = pd.DataFrame({
    'fruit': ['Apple', 'Banana', 'Orange', 'Pineapple'],
    'price (kg)': [1.2, 1.8, 4, 6]
})

jm_price


Unnamed: 0,fruit,price (kg)
0,Apple,1.2
1,Banana,1.8
2,Orange,4.0
3,Pineapple,6.0


In [5]:
df=df.set_index('fruit')
df

Unnamed: 0_level_0,supplier,weight (kg),customer_rating
fruit,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Apple,T & C Bro,1000,4.8
Banana,T & C Bro,2000,3.2
Apple,JM Wholesales,3000,4.2
Banana,JM Wholesales,4000,4.3


In [6]:
tc_price=tc_price.set_index('fruit')
tc_price

Unnamed: 0_level_0,price (kg)
fruit,Unnamed: 1_level_1
Apple,1.1
Banana,2.0
Orange,2.9
Pineapple,3.1


In [7]:
jm_price=jm_price.set_index('fruit')
jm_price

Unnamed: 0_level_0,price (kg)
fruit,Unnamed: 1_level_1
Apple,1.2
Banana,1.8
Orange,4.0
Pineapple,6.0


In [8]:
df['price (kg)']=np.where(df['supplier']=='T & C Bro',
        tc_price.loc[df.index, 'price (kg)'],
        jm_price.loc[df.index, 'price (kg)'])

df

Unnamed: 0_level_0,supplier,weight (kg),customer_rating,price (kg)
fruit,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Apple,T & C Bro,1000,4.8,1.1
Banana,T & C Bro,2000,3.2,2.0
Apple,JM Wholesales,3000,4.2,1.2
Banana,JM Wholesales,4000,4.3,1.8


In [9]:
# Creating a conditional column from 3 datasets

df = pd.DataFrame({
    'fruit': ['Apple', 'Banana', 'Apple', 'Banana', 'Apple', 'Banana'],
    'supplier': ['T & C Bro', 'T & C Bro', 'JM Wholesales', 'JM Wholesales', 'Star Ltd.', 'Star Ltd.'],
    'buy (kg)': [1000,2000,3000,4000, 2000, 1500],
    'customer_rating': [4.8,3.2, 4.2, 4.3, 5, 4.8]
})

df

Unnamed: 0,fruit,supplier,buy (kg),customer_rating
0,Apple,T & C Bro,1000,4.8
1,Banana,T & C Bro,2000,3.2
2,Apple,JM Wholesales,3000,4.2
3,Banana,JM Wholesales,4000,4.3
4,Apple,Star Ltd.,2000,5.0
5,Banana,Star Ltd.,1500,4.8


In [10]:
df_price = pd.DataFrame({
    'fruit': ['Apple', 'Banana', 'Orange', 'Pineapple'],
    'T & C Bro': [1.1, 2, 2.9, 3.1],
    'JM Wholesales': [1.2, 1.8, 4, 6],
    'Star Ltd.': [0.9, 1.5, 3.8, 5.5]
})

df_price


Unnamed: 0,fruit,T & C Bro,JM Wholesales,Star Ltd.
0,Apple,1.1,1.2,0.9
1,Banana,2.0,1.8,1.5
2,Orange,2.9,4.0,3.8
3,Pineapple,3.1,6.0,5.5


In [11]:
df=df.set_index('fruit')
df

Unnamed: 0_level_0,supplier,buy (kg),customer_rating
fruit,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Apple,T & C Bro,1000,4.8
Banana,T & C Bro,2000,3.2
Apple,JM Wholesales,3000,4.2
Banana,JM Wholesales,4000,4.3
Apple,Star Ltd.,2000,5.0
Banana,Star Ltd.,1500,4.8


In [12]:
df_price=df_price.set_index('fruit')
df_price

Unnamed: 0_level_0,T & C Bro,JM Wholesales,Star Ltd.
fruit,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Apple,1.1,1.2,0.9
Banana,2.0,1.8,1.5
Orange,2.9,4.0,3.8
Pineapple,3.1,6.0,5.5


In [13]:
args=df_price.loc[df.index]
args

Unnamed: 0_level_0,T & C Bro,JM Wholesales,Star Ltd.
fruit,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Apple,1.1,1.2,0.9
Banana,2.0,1.8,1.5
Apple,1.1,1.2,0.9
Banana,2.0,1.8,1.5
Apple,1.1,1.2,0.9
Banana,2.0,1.8,1.5


In [14]:
conds=[df['supplier']=='T & C Bro',
      df['supplier']=='JM Wholesales',
      df['supplier']=='Star Ltd.']

choices=[args['T & C Bro'], args['JM Wholesales'], args['Star Ltd.']]

df['price (kg)']=np.select(conds, choices)
df

Unnamed: 0_level_0,supplier,buy (kg),customer_rating,price (kg)
fruit,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Apple,T & C Bro,1000,4.8,1.1
Banana,T & C Bro,2000,3.2,2.0
Apple,JM Wholesales,3000,4.2,1.2
Banana,JM Wholesales,4000,4.3,1.8
Apple,Star Ltd.,2000,5.0,0.9
Banana,Star Ltd.,1500,4.8,1.5


### Tutorial: Add a Column to a Pandas DataFrame Based on an If-Else Condition
[address](https://www.dataquest.io/blog/tutorial-add-column-pandas-dataframe-based-on-if-else-condition/)

In [15]:
df=pd.read_csv('dataquest_tweets_csv_1.csv')
df.head()
df.shape
df.columns

Unnamed: 0,date,time,tweet,mentions,photos,replies_count,retweets_count,likes_count
0,2020-06-29,09:36:13,"""Going from 15k to 170k in salary...There's no comparison!"" - Dataquest learner in the Netherlands SALE ENDS SOON: http://dataquest.io/subscribe pic.twitter.com/AiepYsrpFF",[],['https://pbs.twimg.com/media/EbrvY0SWoAcEQWn.jpg'],0,0,0
1,2020-06-26,14:14:44,"Hi Tom, sorry about that! We try to respond to all paid subscriber tickets within 2 business days so you should get a response on that by the end of the day today. If you don't, please reach out!",['beingtomiwa'],[],0,0,0
2,2020-06-26,12:00:14,Become an @rstudio power user: https://bit.ly/2Vd5g6X,['rstudio'],[],0,2,7
3,2020-06-26,11:23:21,Please get in touch with our support team by emailing hello@dataquest.io,['jimohkassim'],[],0,0,0
4,2020-06-26,02:00:05,Learn to master R markdown in this free tutorial: #rstats https://bit.ly/2VgANEW,[],[],0,2,7


(205, 8)

Index(['date', 'time', 'tweet', 'mentions', 'photos', 'replies_count',
       'retweets_count', 'likes_count '],
      dtype='object')

In [16]:
df.rename({'likes_count ':'likes_count'}, axis='columns', inplace=True)

#### Adding a Pandas Column with a True/False Condition Using np.where()

In [17]:
df['hasimage']=np.where(df['photos']!='[]', True, False)
df

Unnamed: 0,date,time,tweet,mentions,photos,replies_count,retweets_count,likes_count,hasimage
0,2020-06-29,09:36:13,"""Going from 15k to 170k in salary...There's no comparison!"" - Dataquest learner in the Netherlands SALE ENDS SOON: http://dataquest.io/subscribe pic.twitter.com/AiepYsrpFF",[],['https://pbs.twimg.com/media/EbrvY0SWoAcEQWn.jpg'],0,0,0,True
1,2020-06-26,14:14:44,"Hi Tom, sorry about that! We try to respond to all paid subscriber tickets within 2 business days so you should get a response on that by the end of the day today. If you don't, please reach out!",['beingtomiwa'],[],0,0,0,False
2,2020-06-26,12:00:14,Become an @rstudio power user: https://bit.ly/2Vd5g6X,['rstudio'],[],0,2,7,False
3,2020-06-26,11:23:21,Please get in touch with our support team by emailing hello@dataquest.io,['jimohkassim'],[],0,0,0,False
4,2020-06-26,02:00:05,Learn to master R markdown in this free tutorial: #rstats https://bit.ly/2VgANEW,[],[],0,2,7,False
...,...,...,...,...,...,...,...,...,...
200,2020-05-02,01:00:35,This is worth a read: https://bit.ly/3f8X3J8,[],[],1,1,7,False
201,2020-05-01,19:21:55,"If you're logged in, you can share a project or ask a question by clicking the + New Topic button pic.twitter.com/I8h2uLpAxG",['okapi_code'],['https://pbs.twimg.com/media/EW9_tkXXYAQ3ULg.jpg'],1,0,0,True
202,2020-05-01,19:01:19,Is there a specific screen you're having an issue with? I just ran through every screen in those practice problems but didn't encounter any issues. If you keep seeing issues I'd suggest asking about it on http://community.dataquest.io or contacting support: hello@dataquest.io,['okapi_code'],[],1,0,0,False
203,2020-05-01,17:00:15,"We ❤️ both Python and R, so this is cool: https://bit.ly/2SptbP3",[],[],0,2,16,False


In [18]:
image_tweets=df[df['hasimage']==True]
image_tweets.head()
no_image_tweets=df[df['hasimage']==False]
no_image_tweets.head()

Unnamed: 0,date,time,tweet,mentions,photos,replies_count,retweets_count,likes_count,hasimage
0,2020-06-29,09:36:13,"""Going from 15k to 170k in salary...There's no comparison!"" - Dataquest learner in the Netherlands SALE ENDS SOON: http://dataquest.io/subscribe pic.twitter.com/AiepYsrpFF",[],['https://pbs.twimg.com/media/EbrvY0SWoAcEQWn.jpg'],0,0,0,True
7,2020-06-25,10:03:04,"""I'm really learning more at Dataquest than from a master's degree in big data."" - Dataquest Learner in Spain On sale now, view plans: http://dataquest.io/subscribe pic.twitter.com/JbwiJ5sxTv",[],['https://pbs.twimg.com/media/EbXPOjqXgAIgE2f.jpg'],1,0,4,True
10,2020-06-24,09:38:54,"""Dataquest really helped me to switch my career smoothly from teaching to data science."" - Dataquest Learner in the United States On sale now, view plans: http://dataquest.io/subscribe pic.twitter.com/gPySEEqbko",[],['https://pbs.twimg.com/media/EbR_-LoXsAEoCB3.jpg'],1,0,0,True
12,2020-06-23,18:00:28,"We've shared this @lizandmollie image before, but we're doing it again because it's 100% correct. Struggling with a bit of code you've been staring at for hours? Take a break, go for a run, get some food, sleep. It works! Taking care of yourself is key. pic.twitter.com/Zqespjy0UG",['lizandmollie'],['https://pbs.twimg.com/media/EbOpVg6WAAAJcIH.jpg'],0,16,41,True
21,2020-06-22,12:51:55,"""I feel more comfortable using data, which allows me to make better decisions at work."" - Dataquest Learner in the United States On sale now, view plans: http://dataquest.io/subscribe pic.twitter.com/KGz09sSeyI",[],['https://pbs.twimg.com/media/EbIZHbMX0AIXy1p.jpg'],1,0,1,True


Unnamed: 0,date,time,tweet,mentions,photos,replies_count,retweets_count,likes_count,hasimage
1,2020-06-26,14:14:44,"Hi Tom, sorry about that! We try to respond to all paid subscriber tickets within 2 business days so you should get a response on that by the end of the day today. If you don't, please reach out!",['beingtomiwa'],[],0,0,0,False
2,2020-06-26,12:00:14,Become an @rstudio power user: https://bit.ly/2Vd5g6X,['rstudio'],[],0,2,7,False
3,2020-06-26,11:23:21,Please get in touch with our support team by emailing hello@dataquest.io,['jimohkassim'],[],0,0,0,False
4,2020-06-26,02:00:05,Learn to master R markdown in this free tutorial: #rstats https://bit.ly/2VgANEW,[],[],0,2,7,False
5,2020-06-25,21:00:05,This is an interesting story about what was some early and very successful data science — although it happened well before that term became popular: https://bit.ly/2A3Ozn9,[],[],0,1,2,False


In [19]:
df.columns

Index(['date', 'time', 'tweet', 'mentions', 'photos', 'replies_count',
       'retweets_count', 'likes_count', 'hasimage'],
      dtype='object')

In [20]:
print(f'Average likes, all tweets: {df["likes_count"].mean(): .2f}')
print(f'Average likes, image tweets: {image_tweets["likes_count"].mean():.2f}')
print(f'Average likes, no_image_tweets: {no_image_tweets["likes_count"].mean():.2f}')

print()
print(f'Average likes, all tweets: {df["retweets_count"].mean(): .2f}')
print(f'Average likes, image tweets: {image_tweets["retweets_count"].mean():.2f}')
print(f'Average likes, no_image_tweets: {no_image_tweets["retweets_count"].mean():.2f}')

Average likes, all tweets:  8.44
Average likes, image tweets: 6.75
Average likes, no_image_tweets: 8.58

Average likes, all tweets:  2.32
Average likes, image tweets: 1.44
Average likes, no_image_tweets: 2.39


### Adding a Pandas Column with More Complicated Conditions

For example, to dig deeper into this question, we might want to create a few interactivity “tiers” and assess what percentage of tweets that reached each tier contained images. For simplicity’s sake, lets use Likes to measure interactivity, and separate tweets into four tiers:

- tier_4 — 2 or fewer likes
- tier_3 — 3-9 likes
- tier_2 — 10-15 likes
- tier_1 — 16+ likes
To accomplish this, we can use a function called np.select(). We’ll give it two arguments: a list of our conditions, and a correspding list of the value we’d like to assign to each row in our new column.

This means that the order matters: if the first condition in our conditions list is met, the first value in our values list will be assigned to our new column for that row. If the second condition is met, the second value will be assigned, et cetera.

In [21]:
conditions=[
    (df['likes_count']<=2),
    (df['likes_count'] > 2) & (df['likes_count'] <=9),
    (df['likes_count'] <10) & (df['likes_count'] <=15),
    (df['likes_count']>15)
]

choices=['tier_4', 'tier_3', 'tier_2', 'tier_1']

df['tier']=np.select(conditions, choices)

df.head()

Unnamed: 0,date,time,tweet,mentions,photos,replies_count,retweets_count,likes_count,hasimage,tier
0,2020-06-29,09:36:13,"""Going from 15k to 170k in salary...There's no comparison!"" - Dataquest learner in the Netherlands SALE ENDS SOON: http://dataquest.io/subscribe pic.twitter.com/AiepYsrpFF",[],['https://pbs.twimg.com/media/EbrvY0SWoAcEQWn.jpg'],0,0,0,True,tier_4
1,2020-06-26,14:14:44,"Hi Tom, sorry about that! We try to respond to all paid subscriber tickets within 2 business days so you should get a response on that by the end of the day today. If you don't, please reach out!",['beingtomiwa'],[],0,0,0,False,tier_4
2,2020-06-26,12:00:14,Become an @rstudio power user: https://bit.ly/2Vd5g6X,['rstudio'],[],0,2,7,False,tier_3
3,2020-06-26,11:23:21,Please get in touch with our support team by emailing hello@dataquest.io,['jimohkassim'],[],0,0,0,False,tier_4
4,2020-06-26,02:00:05,Learn to master R markdown in this free tutorial: #rstats https://bit.ly/2VgANEW,[],[],0,2,7,False,tier_3


In [22]:
df[df['tier']=='tier_4']['hasimage'].value_counts(normalize=True)

False   0.85
True    0.15
Name: hasimage, dtype: float64

In [23]:
df[df['tier']=='tier_1']['hasimage'].value_counts(normalize=True)

False   0.91
True    0.09
Name: hasimage, dtype: float64