# 1. DATAFRAME CONCATENATION

In [1]:
# Import Pandas
import pandas as pd

In [2]:
# Creating a dataframe from a dictionary
# Let's define a dataframe with a list of bank clients with IDs = 1, 2, 3, 4, 5 
# Check this out: https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html
raw_data = {'Bank Client Id' : ['1','2','3','4','5'],
            'First Name' : ['Nancy','Alex','Shep','Max','Allen'],
            'Last Name' : ['Rob','Ali','George','Mitch','Steve']}


bank1_df = pd.DataFrame(raw_data)
bank1_df

Unnamed: 0,Bank Client Id,First Name,Last Name
0,1,Nancy,Rob
1,2,Alex,Ali
2,3,Shep,George
3,4,Max,Mitch
4,5,Allen,Steve


In [3]:
# Let's define another dataframe for a separate list of clients (IDs = 6, 7, 8, 9, 10)
raw_data = {'Bank Client Id' : ['6','7','8','9','10'],
            'First Name' : ['Nancy','Alex','Shep','Max','Allen'],
            'Last Name' : ['Rob','Ali','George','Mitch','Steve']}

bank2_df = pd.DataFrame(raw_data)
bank2_df

Unnamed: 0,Bank Client Id,First Name,Last Name
0,6,Nancy,Rob
1,7,Alex,Ali
2,8,Shep,George
3,9,Max,Mitch
4,10,Allen,Steve


In [4]:
# Let's concatenate both dataframes #1 and #2
# Note that we now have client IDs from 1 to 10
# Note that by default ignore_index has been set to False meaning indexes from both dataframes are kept unchanged
bank_all_df = pd.concat([bank1_df, bank2_df])
bank_all_df

Unnamed: 0,Bank Client Id,First Name,Last Name
0,1,Nancy,Rob
1,2,Alex,Ali
2,3,Shep,George
3,4,Max,Mitch
4,5,Allen,Steve
0,6,Nancy,Rob
1,7,Alex,Ali
2,8,Shep,George
3,9,Max,Mitch
4,10,Allen,Steve


In [5]:
# Let's concatenate both dataframes #1 and #2
# Note that by setting ignore_index = True, the index has been automatically set to numeric and now ranges from 1 to 9
bank_all_df = pd.concat([bank1_df, bank2_df],ignore_index=True)
bank_all_df

Unnamed: 0,Bank Client Id,First Name,Last Name
0,1,Nancy,Rob
1,2,Alex,Ali
2,3,Shep,George
3,4,Max,Mitch
4,5,Allen,Steve
5,6,Nancy,Rob
6,7,Alex,Ali
7,8,Shep,George
8,9,Max,Mitch
9,10,Allen,Steve


In [6]:
len(bank_all_df)

10

In [8]:
# You can also use the append method to perform similar task
# Note that order matters!
bank2_df.append(bank1_df)
# 더이상 사용되지 않음

AttributeError: 'DataFrame' object has no attribute 'append'

In [None]:
# You can also use the append method to perform similar task 


**MINI CHALLENGE #1:**
- **Assume that you and your significant other become a new client at the bank and would like to add your first names, last names and unique client IDs. Define a new DataFrame and add it to the master list "bank_all_df"** 

# 2. DATAFRAME CONCATENATION WITH MULTI-INDEXING

In [9]:
# We can perform concatenation and also use multi-indexing dataframe as follows:
bank_all_df = pd.concat([bank1_df,bank2_df],keys= ['Customers Group1','Customers Group2'])
bank_all_df

Unnamed: 0,Unnamed: 1,Bank Client Id,First Name,Last Name
Customers Group1,0,1,Nancy,Rob
Customers Group1,1,2,Alex,Ali
Customers Group1,2,3,Shep,George
Customers Group1,3,4,Max,Mitch
Customers Group1,4,5,Allen,Steve
Customers Group2,0,6,Nancy,Rob
Customers Group2,1,7,Alex,Ali
Customers Group2,2,8,Shep,George
Customers Group2,3,9,Max,Mitch
Customers Group2,4,10,Allen,Steve


In [11]:
# You can access elements using multi-indexing as follows
bank_all_df.loc[('Customers Group1'),:]

Unnamed: 0,Bank Client Id,First Name,Last Name
0,1,Nancy,Rob
1,2,Alex,Ali
2,3,Shep,George
3,4,Max,Mitch
4,5,Allen,Steve


In [12]:
# You can access elements using multi-indexing as follows
bank_all_df.loc[('Customers Group1'),0]

Bank Client Id        1
First Name        Nancy
Last Name           Rob
Name: (Customers Group1, 0), dtype: object

In [14]:
# You can access elements using multi-indexing as follows
bank_all_df.loc[('Customers Group2'),'First Name']

0    Nancy
1     Alex
2     Shep
3      Max
4    Allen
Name: First Name, dtype: object

**MINI CHALLENGE #2:**
- **Assume that you and your significant other belong to Customers Group #3. Use multindexing to add both names to the master list. Write a line of code to access Group #3 only.**

# 3. DATA MERGING

In [15]:
# Let's concatenate both dataframes #1 and #2
# Note that we now have client IDs from 1 to 10
# Note that by default ignore_index has been set to False meaning indexes from both dataframes are kept unchanged
bank_all_df = pd.concat([bank1_df,bank2_df],ignore_index=True)
bank_all_df

Unnamed: 0,Bank Client Id,First Name,Last Name
0,1,Nancy,Rob
1,2,Alex,Ali
2,3,Shep,George
3,4,Max,Mitch
4,5,Allen,Steve
5,6,Nancy,Rob
6,7,Alex,Ali
7,8,Shep,George
8,9,Max,Mitch
9,10,Allen,Steve


In [16]:
# Let's assume we obtained additional information (Annual Salary) about our bank customers 
# Note that data obtained is for all clients with IDs 1 to 10
raw_data = {
    'Bank Client Id' : ['1','2','3','4','5','6','7','8','9','10'],
    'Annual Salarty [$/year]' : [1000,2000,3000,4000,5000,6000,7000,8000,9000,10000]
}
raw_data

{'Bank Client Id': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
 'Annual Salarty [$/year]': [1000,
  2000,
  3000,
  4000,
  5000,
  6000,
  7000,
  8000,
  9000,
  10000]}

In [18]:
# Let's merge all data on 'Bank Client ID'
bank_salary_df = pd.DataFrame(raw_data)
bank_all_df

Unnamed: 0,Bank Client Id,First Name,Last Name
0,1,Nancy,Rob
1,2,Alex,Ali
2,3,Shep,George
3,4,Max,Mitch
4,5,Allen,Steve
5,6,Nancy,Rob
6,7,Alex,Ali
7,8,Shep,George
8,9,Max,Mitch
9,10,Allen,Steve


In [19]:
bank_all_df = pd.merge(bank_all_df,bank_salary_df,on= 'Bank Client Id')
bank_all_df

Unnamed: 0,Bank Client Id,First Name,Last Name,Annual Salarty [$/year]
0,1,Nancy,Rob,1000
1,2,Alex,Ali,2000
2,3,Shep,George,3000
3,4,Max,Mitch,4000
4,5,Allen,Steve,5000
5,6,Nancy,Rob,6000
6,7,Alex,Ali,7000
7,8,Shep,George,8000
8,9,Max,Mitch,9000
9,10,Allen,Steve,10000


**MINI CHALLENGE #3:**
- **Let's assume that you were able to obtain two new pieces of information about the bank clients such as: (1) credit card debt, (2) age**
- **Define a new DataFrame that contains this new information**
- **Merge this new information to the DataFrame "bank_all_df".** 

# MINI CHALLENGES SOLUTIONS

**MINI CHALLENGE #1 SOLUTION:**
- **Assume that you and your significant other become a new client at the bank and would like to add your first names, last names and unique client IDs. Define a new DataFrame and add it to the master list "bank_all_df"** 


In [None]:
new_data = {'Bank Client ID': ['11', '12'],
            'First Name': ['Justin', 'Sophie'], 
            'Last Name': ['Trudeau', 'Trudeau']}

bank3_df = pd.DataFrame(new_data, columns = ['Bank Client ID', 'First Name', 'Last Name'])
bank3_df

In [None]:
# Let's concatenate both dataframes #1 and #2
# Note that by setting ignore_index = True, the index has been automatically set to numeric and now ranges from 1 to 9
bank_all_df = pd.concat([bank_all_df, bank3_df], ignore_index = True)
bank_all_df

**MINI CHALLENGE #2 SOLUTION:**
- **Assume that you and your significant other belong to Customers Group #3. Use multindexing to add both names to the master list. Write a line of code to access Group #3 only.**

In [None]:
bank1_df

In [None]:
bank2_df

In [None]:
new_data = {'Bank Client ID': ['11', '12'],
            'First Name': ['Justin', 'Sophie'], 
            'Last Name': ['Trudeau', 'Trudeau']}

bank3_df = pd.DataFrame(new_data, columns = ['Bank Client ID', 'First Name', 'Last Name'])
bank3_df

In [None]:
# We can perform concatenation and also use multi-indexing dataframe as follows:
bank_all_df = pd.concat([bank1_df, bank2_df, bank3_df], keys = ["Customers Group 1", "Customers Group 2",  "Customers Group 3"])
bank_all_df

In [None]:
# You can access elements using multi-indexing as follows
bank_all_df.loc[("Customers Group 3"), :]

**MINI CHALLENGE #3 SOLUTION:**
- **Let's assume that you were able to obtain two new pieces of information about the bank clients such as: (1) credit card debt, (2) age**
- **Define a new DataFrame that contains this new information**
- **Merge this new information to the DataFrame "bank_all_df".** 

In [None]:
# Let's assume we obtained additional information (Annual Salary) about our bank customers 
# Note that data obtained is for all clients with IDs 1 to 10
 
raw_data = {
        'Bank Client ID': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
        'Credit Card Debt': [1000, 100, 500, 600, 0, 20, 360, 127, 3000, 2200],
        'Age': [44, 35, 67, 19, 22, 45, 48, 33, 34, 36]}
bank_credit_age_df = pd.DataFrame(raw_data, columns = ['Bank Client ID','Credit Card Debt', 'Age'])
bank_credit_age_df

In [None]:
# Let's merge all data on 'Bank Client ID'
bank_all_df = pd.merge(bank_all_df, bank_credit_age_df, on = 'Bank Client ID')
bank_all_df
