# 6-Renaming and Combining

In [2]:
import pandas as pd

def currency_to_int(curr):
    curr_list = curr[1:].split(",")
    price = int("".join(curr_list))
    return price

def trans_rooms_to_int(entry):
    return int(entry.split(" ")[0])

# transform prices
atl_addr = pd.read_csv("./atl-address-1.csv")
for i in range(len(atl_addr)):
    atl_addr.loc[i, "Price"] = currency_to_int(atl_addr.loc[i, "Price"])
    atl_addr.loc[i, "Beds"] = trans_rooms_to_int(atl_addr.loc[i, "Beds"])
    atl_addr.loc[i, "Baths"] = trans_rooms_to_int(atl_addr.loc[i, "Baths"])
       
atl_addr.head()

Unnamed: 0,Title,Price,Beds,Baths,Area
0,"34 The Prado NE, Atlanta, GA 30309",1495000,4,4,"3,644 sqft"
1,"2060 Shirley St SW, Atlanta, GA 30311",225000,3,2,"1,300 sqft"
2,"300 Peachtree St NE APT 11G, Atlanta, GA 30308",259000,2,2,890 sqft
3,"1690 Memorial Dr SE, Atlanta, GA 30317",320000,2,1,"1,163 sqft"
4,"6253 Old Kingston Dr, South Fulton, GA",349275,5,3,-- sqft


## Renaming

In [4]:
# rename() lets you change index names and/or column names
# rename column
atl_addr.rename(columns = {"Title": "title"}).head(5)

Unnamed: 0,title,Price,Beds,Baths,Area
0,"34 The Prado NE, Atlanta, GA 30309",1495000,4,4,"3,644 sqft"
1,"2060 Shirley St SW, Atlanta, GA 30311",225000,3,2,"1,300 sqft"
2,"300 Peachtree St NE APT 11G, Atlanta, GA 30308",259000,2,2,890 sqft
3,"1690 Memorial Dr SE, Atlanta, GA 30317",320000,2,1,"1,163 sqft"
4,"6253 Old Kingston Dr, South Fulton, GA",349275,5,3,-- sqft


In [5]:
# rename specific indices
atl_addr.rename(index={0: "First", 1: "Second"}).head(5)

Unnamed: 0,Title,Price,Beds,Baths,Area
First,"34 The Prado NE, Atlanta, GA 30309",1495000,4,4,"3,644 sqft"
Second,"2060 Shirley St SW, Atlanta, GA 30311",225000,3,2,"1,300 sqft"
2,"300 Peachtree St NE APT 11G, Atlanta, GA 30308",259000,2,2,890 sqft
3,"1690 Memorial Dr SE, Atlanta, GA 30317",320000,2,1,"1,163 sqft"
4,"6253 Old Kingston Dr, South Fulton, GA",349275,5,3,-- sqft


In [6]:
#Both the row index and the column index can have their own name attribute
atl_addr.rename_axis("Houses", axis="rows").rename_axis("Info", axis="columns").head(5)

Info,Title,Price,Beds,Baths,Area
Houses,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,"34 The Prado NE, Atlanta, GA 30309",1495000,4,4,"3,644 sqft"
1,"2060 Shirley St SW, Atlanta, GA 30311",225000,3,2,"1,300 sqft"
2,"300 Peachtree St NE APT 11G, Atlanta, GA 30308",259000,2,2,890 sqft
3,"1690 Memorial Dr SE, Atlanta, GA 30317",320000,2,1,"1,163 sqft"
4,"6253 Old Kingston Dr, South Fulton, GA",349275,5,3,-- sqft


## Combining
Pandas has 3 core methods to combine different DataFrame and Series, these are:<br>
**concat(), join(), and merge().**<br>
Most of what **merge()** can do can also be done more simply with **join()**.<br>

**# Smush DataFrame with same columns**<br>
canadian_youtube = pd.read_csv("../input/youtube-new/CAvideos.csv")<br>
british_youtube = pd.read_csv("../input/youtube-new/GBvideos.csv")

pd.concat([canadian_youtube, british_youtube])

**# combine different DataFrame objects which have an index in common.**<br>
left = canadian_youtube.set_index(['title', 'trending_date'])<br>
right = british_youtube.set_index(['title', 'trending_date'])<br>

left.join(right, lsuffix='_CAN', rsuffix='_UK')<br>
**# The lsuffix and rsuffix parameters are necessary here because the data has the same column names in both British and Canadian datasets. If this wasn't true (because, say, we'd renamed them beforehand) we wouldn't need them.**