In [1]:
import numpy as np
import pandas as pd

In [2]:
# Dataframe of number of sales made by an employee
sales = {'Tony': 103,
         'Sally': 202,
         'Randy': 380,
         'Ellen': 101,
         'Fred': 82
        }
        
        # Dataframe of all employees and the region they work in
region = {'Tony': 'West',
          'Sally': 'South',
          'Carl': 'West',
          'Archie': 'North',
          'Randy': 'East',
          'Ellen': 'South',
          'Fred': np.nan,
          'Mo': 'East',
          'HanWei': np.nan,
         }

In [3]:
# Make dataframes
sales_df = pd.DataFrame.from_dict(sales, orient='index', 
                                  columns=['sales'])
region_df = pd.DataFrame.from_dict(region, orient='index', 
                                   columns=['region'])

In [4]:
sales_df.loc['Tony']

sales    103
Name: Tony, dtype: int64

In [5]:
 sales_df.index

Index(['Tony', 'Sally', 'Randy', 'Ellen', 'Fred'], dtype='object')

In [6]:
joined_df = region_df.join(sales_df, how='left')
print(joined_df)

       region  sales
Tony     West  103.0
Sally   South  202.0
Carl     West    NaN
Archie  North    NaN
Randy    East  380.0
Ellen   South  101.0
Fred      NaN   82.0
Mo       East    NaN
HanWei    NaN    NaN


In [7]:
joined_df = region_df.join(sales_df, how='right')
print(joined_df)

      region  sales
Tony    West    103
Sally  South    202
Randy   East    380
Ellen  South    101
Fred     NaN     82


In [8]:
grouped_df = joined_df.groupby(by='region').sum()
grouped_df.reset_index(inplace=True)
print(grouped_df)

  region  sales
0   East    380
1  South    303
2   West    103


In [9]:
employee_contrib = joined_df.merge(grouped_df, how='left', 
                                         left_on='region', 
                                         right_on='region',
                                         suffixes=('','_region'))
print(employee_contrib)

  region  sales  sales_region
0   West    103         103.0
1  South    202         303.0
2   East    380         380.0
3  South    101         303.0
4    NaN     82           NaN


In [10]:
employee_contrib = employee_contrib.set_index(joined_df.index)
print(employee_contrib)

      region  sales  sales_region
Tony    West    103         103.0
Sally  South    202         303.0
Randy   East    380         380.0
Ellen  South    101         303.0
Fred     NaN     82           NaN


In [11]:
# Drop NAs in region column
employee_contrib = employee_contrib.dropna(subset=['region'])
print(employee_contrib)

      region  sales  sales_region
Tony    West    103         103.0
Sally  South    202         303.0
Randy   East    380         380.0
Ellen  South    101         303.0


In [12]:
# Fill NAs in sales column with 0
employee_contrib = employee_contrib.fillna({'sales': 0})
print(employee_contrib)

      region  sales  sales_region
Tony    West    103         103.0
Sally  South    202         303.0
Randy   East    380         380.0
Ellen  South    101         303.0


In [14]:
employee_contrib['%_of_sales'] = employee_contrib['sales']/employee_contrib['sales_region']
employee_contrib['%_of_sales']

Tony     1.000000
Sally    0.666667
Randy    1.000000
Ellen    0.333333
Name: %_of_sales, dtype: float64

In [16]:
df1 = pd.DataFrame({
"city": ["new york","chicago","orlando", "baltimore"],
"temperature": [21,14,35,38],
"humidity": [65,68,71, 75]
})
df1


Unnamed: 0,city,temperature,humidity
0,new york,21,65
1,chicago,14,68
2,orlando,35,71
3,baltimore,38,75


In [17]:
df2 = pd.DataFrame({
"city": ["chicago","new york","san diego"],
"temperature": [21,14,35],
"humidity": [65,68,71]
})
df2

Unnamed: 0,city,temperature,humidity
0,chicago,21,65
1,new york,14,68
2,san diego,35,71


In [21]:
df3= pd.merge(df1,df2,on="city",suffixes=('_first','_second'),how='outer')
df3

Unnamed: 0,city,temperature_first,humidity_first,temperature_second,humidity_second
0,new york,21.0,65.0,14.0,68.0
1,chicago,14.0,68.0,21.0,65.0
2,orlando,35.0,71.0,,
3,baltimore,38.0,75.0,,
4,san diego,,,35.0,71.0
