## Reading from CSV file or excel file

In [106]:
import pandas as pd
import numpy as np

df = pd.read_csv("titanic.csv")
# df = pd.read_excel("titanic.csv")
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


## Extracting columns from dataset
  
We can extract certain columns from dataset, as well as we can reorganise the order of the columns.

In [107]:
df1 = df[['Name','Survived','Age','Ticket']]
df1.head()

Unnamed: 0,Name,Survived,Age,Ticket
0,"Braund, Mr. Owen Harris",0,22.0,A/5 21171
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,38.0,PC 17599
2,"Heikkinen, Miss. Laina",1,26.0,STON/O2. 3101282
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,35.0,113803
4,"Allen, Mr. William Henry",0,35.0,373450


## Indexing in Pandas (Iterating through each column)

size of dataset  

----
```
df.shape # returns the size of the dataset (rows, columns)
```

----
1. Indexing with position
    - iteration example
    
2. Indexing with Labels
    - iteration example

In [108]:
df.iloc[0,2] # First row, third column
df.iloc[7,2] # Eleventh row, third column

print(df.shape)
for i in range(df.shape[0]):
    row = str(i+1)+" : "
    for j in range(df.shape[1]):
        row+=" "+str(df.iloc[i,j])
    if i == 10:
        break
    print(row)

(156, 12)
1 :  1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.25 nan S
2 :  2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Thayer) female 38.0 1 0 PC 17599 71.2833 C85 C
3 :  3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.925 nan S
4 :  4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1 C123 S
5 :  5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.05 nan S
6 :  6 0 3 Moran, Mr. James male nan 0 0 330877 8.4583 nan Q
7 :  7 0 1 McCarthy, Mr. Timothy J male 54.0 0 0 17463 51.8625 E46 S
8 :  8 0 3 Palsson, Master. Gosta Leonard male 2.0 3 1 349909 21.075 nan S
9 :  9 1 3 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.0 0 2 347742 11.1333 nan S
10 :  10 1 2 Nasser, Mrs. Nicholas (Adele Achem) female 14.0 1 0 237736 30.0708 nan C


In [109]:
print(list(df.columns))
for i in range(df.shape[0]):
    row = str(i+1)+" : "
    for col in df.columns:
        row+=" "+str(df.loc[i, col])
    if i == 10:
        break
    print(row)

['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked']
1 :  1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.25 nan S
2 :  2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Thayer) female 38.0 1 0 PC 17599 71.2833 C85 C
3 :  3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.925 nan S
4 :  4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1 C123 S
5 :  5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.05 nan S
6 :  6 0 3 Moran, Mr. James male nan 0 0 330877 8.4583 nan Q
7 :  7 0 1 McCarthy, Mr. Timothy J male 54.0 0 0 17463 51.8625 E46 S
8 :  8 0 3 Palsson, Master. Gosta Leonard male 2.0 3 1 349909 21.075 nan S
9 :  9 1 3 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.0 0 2 347742 11.1333 nan S
10 :  10 1 2 Nasser, Mrs. Nicholas (Adele Achem) female 14.0 1 0 237736 30.0708 nan C


## Slicing with loc and iLoc

In [110]:
df1 = df.iloc[0:10, 0:4]
df1.head(2)

Unnamed: 0,PassengerId,Survived,Pclass,Name
0,1,0,3,"Braund, Mr. Owen Harris"
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th..."


In [111]:

df2 = df.loc[0:10, 'Name':'Cabin']
df2.head(2)

Unnamed: 0,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin
0,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85


## Finding unique values in a column

example - Finds all possible age of the passengers

In [115]:
df['Age'].unique()

array([22.  , 38.  , 26.  , 35.  ,   nan, 54.  ,  2.  , 27.  , 14.  ,
        4.  , 58.  , 20.  , 39.  , 55.  , 31.  , 34.  , 15.  , 28.  ,
        8.  , 19.  , 40.  , 66.  , 42.  , 21.  , 18.  ,  3.  ,  7.  ,
       49.  , 29.  , 65.  , 28.5 ,  5.  , 11.  , 45.  , 17.  , 32.  ,
       16.  , 25.  ,  0.83, 30.  , 33.  , 23.  , 24.  , 46.  , 59.  ,
       71.  , 37.  , 47.  , 14.5 , 70.5 , 32.5 , 12.  ,  9.  , 36.5 ,
       51.  , 55.5 , 40.5 ])

## Quering in pandas (Conditions)

In [120]:
## Returns the boolean value of each row - To find out briefly how many rows follow the condition

df['Age'] > 20

0       True
1       True
2       True
3       True
4       True
       ...  
151     True
152     True
153     True
154    False
155     True
Name: Age, Length: 156, dtype: bool

In [121]:
## Returns the exact row which follows the condition
## All passengers above 20 years old

df[df['Age']>20]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
150,151,0,2,"Bateman, Rev. Robert James",male,51.0,0,0,S.O.P. 1166,12.5250,,S
151,152,1,1,"Pears, Mrs. Thomas (Edith Wearne)",female,22.0,1,0,113776,66.6000,C2,S
152,153,0,3,"Meo, Mr. Alfonzo",male,55.5,0,0,A.5. 11206,8.0500,,S
153,154,0,3,"van Billiard, Mr. Austin Blyler",male,40.5,0,2,A/5. 851,14.5000,,S


## Saving the new dataframe

1. Extract required data
2. Make some changes
3. Save to a new file

In [124]:
df_new = df[df['Age']>20]
df_new

df_new.to_csv("titanic_passenger_above_20.csv")