### Basic Proccessing examples
1. Use `apply` on specific columns and generate new columns.
2. Use `pd.cut` to categorized columns with continuity value and generate new columns.

In [1]:
import pandas as pd

In [4]:
df_titanic = pd.read_csv('http://bit.ly/kaggletrain')
df_titanic_copy = df_titanic.copy()
df_titanic_copy.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


### Use `apply` on specific columns and generate new columns

***Example 1***

In [5]:
columns = df_titanic_copy.columns.tolist()[:4]
columns

['PassengerId', 'Survived', 'Pclass', 'Name']

In [6]:
new_col = '存活'
columns.insert(1, new_col)  # 調整欄位順序用
columns

['PassengerId', '存活', 'Survived', 'Pclass', 'Name']

In [7]:
df_titanic_copy[new_col] = df_titanic_copy.Survived.apply(lambda x: '倖存' if x else '死亡')
df_titanic_copy.loc[:5, columns]

Unnamed: 0,PassengerId,存活,Survived,Pclass,Name
0,1,死亡,0,3,"Braund, Mr. Owen Harris"
1,2,倖存,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th..."
2,3,倖存,1,3,"Heikkinen, Miss. Laina"
3,4,倖存,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)"
4,5,死亡,0,3,"Allen, Mr. William Henry"
5,6,死亡,0,3,"Moran, Mr. James"


***Example 2***

In [8]:
d = {'male': '男性', 'female': '女性'} 
def generate_desc(row):
    return f"一名 {row['Age']} 歲的{d[row['Sex']]}"

df_titanic_copy['描述'] = df_titanic_copy.apply(generate_desc, axis=1)
df_titanic_copy.loc[:5, 'Sex':]

Unnamed: 0,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,存活,描述
0,male,22.0,1,0,A/5 21171,7.25,,S,死亡,一名 22.0 歲的男性
1,female,38.0,1,0,PC 17599,71.2833,C85,C,倖存,一名 38.0 歲的女性
2,female,26.0,0,0,STON/O2. 3101282,7.925,,S,倖存,一名 26.0 歲的女性
3,female,35.0,1,0,113803,53.1,C123,S,倖存,一名 35.0 歲的女性
4,male,35.0,0,0,373450,8.05,,S,死亡,一名 35.0 歲的男性
5,male,,0,0,330877,8.4583,,Q,死亡,一名 nan 歲的男性


---
---

### Use `pd.cut` to categorized columns with continuity value and generate new columns

In [12]:
new_col = '年齡區間'

# 將 numerical 轉換成 categorical 欄位
labels = [f'族群 {i}' for i in range(1, 11)]
df_titanic_copy[new_col] = pd.cut(x=df_titanic_copy.Age, bins=10, labels=labels)

# 可以排序切割後的 categorical 欄位
df_titanic_copy.sort_values(new_col, ascending=False).reset_index().loc[:5,:]

Unnamed: 0,index,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,存活,描述,年齡區間
0,630,631,1,1,"Barkworth, Mr. Algernon Henry Wilson",male,80.0,0,0,27042,30.0,A23,S,倖存,一名 80.0 歲的男性,族群 10
1,851,852,0,3,"Svensson, Mr. Johan",male,74.0,0,0,347060,7.775,,S,死亡,一名 74.0 歲的男性,族群 10
2,456,457,0,1,"Millet, Mr. Francis Davis",male,65.0,0,0,13509,26.55,E38,S,死亡,一名 65.0 歲的男性,族群 9
3,672,673,0,2,"Mitchell, Mr. Henry Michael",male,70.0,0,0,C.A. 24580,10.5,,S,死亡,一名 70.0 歲的男性,族群 9
4,116,117,0,3,"Connors, Mr. Patrick",male,70.5,0,0,370369,7.75,,Q,死亡,一名 70.5 歲的男性,族群 9
5,280,281,0,3,"Duane, Mr. Frank",male,65.0,0,0,336439,7.75,,Q,死亡,一名 65.0 歲的男性,族群 9
