## Making a Data Frame

In [2]:
import pandas as pd
import numpy as np

df = pd.DataFrame({'A': [1, 2, 3, 4, 5], 'B': [6, 7, 8, 9, 10]})
df

Unnamed: 0,A,B
0,1,6
1,2,7
2,3,8
3,4,9
4,5,10


In [3]:
# Using numpy to create Data Frame
arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
df_np = pd.DataFrame(data=arr)
df_np

Unnamed: 0,0,1,2
0,1,2,3
1,4,5,6
2,7,8,9


In [4]:
pd.DataFrame(np.random.randn(3, 4), columns=list('ABCD'))

Unnamed: 0,A,B,C,D
0,-0.227901,-1.074001,0.37051,-0.360633
1,-0.640006,-1.241787,0.302799,-0.715065
2,-0.332755,-0.322999,0.71842,-0.732727


In [6]:
# renaming the columns
df_np.rename(columns={0:'A', 1: 'B', 2: 'C'}, inplace=True)
print(df_np)

   A  B  C
0  1  2  3
1  4  5  6
2  7  8  9


In [8]:
# adding prefix to columns
df_np = df_np.add_prefix('col_')
df_np

Unnamed: 0,col_A,col_B,col_C
0,1,2,3
1,4,5,6
2,7,8,9


## Reversing a row order

In [9]:
import seaborn as sns
kashti = sns.load_dataset('titanic')
kashti.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [10]:
kashti.loc[::-1].head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
890,0,3,male,32.0,0,0,7.75,Q,Third,man,True,,Queenstown,no,True
889,1,1,male,26.0,0,0,30.0,C,First,man,True,C,Cherbourg,yes,True
888,0,3,female,,1,2,23.45,S,Third,woman,False,,Southampton,no,False
887,1,1,female,19.0,0,0,30.0,S,First,woman,False,B,Southampton,yes,True
886,0,2,male,27.0,0,0,13.0,S,Second,man,True,,Southampton,no,True


In [11]:
kashti.reset_index(drop=True).head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [12]:
kashti.reset_index(drop=True).head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


## Reversing the column order

In [14]:
kashti.loc[:, ::-1].head()

Unnamed: 0,alone,alive,embark_town,deck,adult_male,who,class,embarked,fare,parch,sibsp,age,sex,pclass,survived
0,False,no,Southampton,,True,man,Third,S,7.25,0,1,22.0,male,3,0
1,False,yes,Cherbourg,C,False,woman,First,C,71.2833,0,1,38.0,female,1,1
2,True,yes,Southampton,,False,woman,Third,S,7.925,0,0,26.0,female,3,1
3,False,yes,Southampton,C,False,woman,First,S,53.1,0,1,35.0,female,1,1
4,True,no,Southampton,,True,man,Third,S,8.05,0,0,35.0,male,3,0


## Selecting only numerical values

In [18]:
kashti.select_dtypes(include=['category']).head()

Unnamed: 0,class,deck
0,Third,
1,First,C
2,Third,
3,First,C
4,Third,


In [19]:
kashti.select_dtypes(include=['category']).head()

Unnamed: 0,class,deck
0,Third,
1,First,C
2,Third,
3,First,C
4,Third,


In [20]:
# sampling from dataset
kashti.sample(frac=0.5).shape

(446, 15)

In [22]:
from random import random
kashti1 = kashti.sample(frac=0.5, random_state=1)
kashti1.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
862,1,1,female,48.0,0,0,25.9292,S,First,woman,False,D,Southampton,yes,True
223,0,3,male,,0,0,7.8958,S,Third,man,True,,Southampton,no,True
84,1,2,female,17.0,0,0,10.5,S,Second,woman,False,,Southampton,yes,True
680,0,3,female,,0,0,8.1375,Q,Third,woman,False,,Queenstown,no,True
535,1,2,female,7.0,0,2,26.25,S,Second,child,False,,Southampton,yes,False


In [23]:
kashti2 = kashti.drop(kashti1.index)
kashti2.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
7,0,3,male,2.0,3,1,21.075,S,Third,child,False,,Southampton,no,False
10,1,3,female,4.0,1,1,16.7,S,Third,child,False,G,Southampton,yes,False
15,1,2,female,55.0,0,0,16.0,S,Second,woman,False,,Southampton,yes,True
18,0,3,female,31.0,1,0,18.0,S,Third,woman,False,,Southampton,no,False


In [24]:
# joining two data sets
kashti3 = kashti1.append(kashti2)
kashti3.shape

  kashti3 = kashti1.append(kashti2)


(891, 15)

In [25]:
kashti.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [26]:
kashti.sex.unique()

array(['male', 'female'], dtype=object)

In [27]:
kashti[kashti.embark_town.isin(['Queenstown'])]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
5,0,3,male,,0,0,8.4583,Q,Third,man,True,,Queenstown,no,True
16,0,3,male,2.0,4,1,29.1250,Q,Third,child,False,,Queenstown,no,False
22,1,3,female,15.0,0,0,8.0292,Q,Third,child,False,,Queenstown,yes,True
28,1,3,female,,0,0,7.8792,Q,Third,woman,False,,Queenstown,yes,True
32,1,3,female,,0,0,7.7500,Q,Third,woman,False,,Queenstown,yes,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
790,0,3,male,,0,0,7.7500,Q,Third,man,True,,Queenstown,no,True
825,0,3,male,,0,0,6.9500,Q,Third,man,True,,Queenstown,no,True
828,1,3,male,,0,0,7.7500,Q,Third,man,True,,Queenstown,yes,True
885,0,3,female,39.0,0,5,29.1250,Q,Third,woman,False,,Queenstown,no,False


In [31]:
kashti[kashti.embark_town.isin(['Queenstown', 'Southampton'])].head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True
5,0,3,male,,0,0,8.4583,Q,Third,man,True,,Queenstown,no,True


In [32]:
kashti.embark_town.value_counts()

Southampton    644
Cherbourg      168
Queenstown      77
Name: embark_town, dtype: int64

In [33]:
kashti.age.value_counts().nlargest(3)

24.0    30
22.0    27
18.0    26
Name: age, dtype: int64

In [34]:
kashti.groupby('who').count()

Unnamed: 0_level_0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,adult_male,deck,embark_town,alive,alone
who,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
child,83,83,83,83,83,83,83,83,83,83,13,83,83,83
man,537,537,537,413,537,537,537,537,537,537,99,537,537,537
woman,271,271,271,218,271,271,271,269,271,271,91,269,271,271


In [35]:
kashti.groupby(['who', 'pclass']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,survived,sex,age,sibsp,parch,fare,embarked,class,adult_male,deck,embark_town,alive,alone
who,pclass,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
child,1,6,6,6,6,6,6,6,6,6,6,6,6,6
child,2,19,19,19,19,19,19,19,19,19,4,19,19,19
child,3,58,58,58,58,58,58,58,58,58,3,58,58,58
man,1,119,119,98,119,119,119,119,119,119,91,119,119,119
man,2,99,99,90,99,99,99,99,99,99,3,99,99,99
man,3,319,319,225,319,319,319,319,319,319,5,319,319,319
woman,1,91,91,82,91,91,91,89,91,91,78,89,91,91
woman,2,66,66,64,66,66,66,66,66,66,9,66,66,66
woman,3,114,114,72,114,114,114,114,114,114,4,114,114,114


In [36]:
kashti['new_age'] = pd.cut(kashti.age, bins=[0, 18, 25, 99], labels=['child', 'young', 'adult'])

In [37]:
kashti.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,new_age
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False,young
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False,adult
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True,adult
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False,adult
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True,adult
