In [1]:
import numpy as np
import pandas as pd 

**Creating a DataFrame**

In [2]:
data = {
    'Name': ['John', 'Anna', 'Peter', 'Linda'],
    'Age': [28, 34, 29, 42],
    'City': ['New York', 'Paris', 'Berlin', 'London'],
    'Salary': [65000, 70000, 62000, 85000]
}
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [3]:
data_list = [
    ['John', 28, 'New York', 65000],
    ['Anna', 34, 'Paris', 70000],
    ['Peter', 29, 'Berlin', 62000],
    ['Linda', 42, 'London', 85000]
]
df3=pd.DataFrame(data_list)
df3


Unnamed: 0,0,1,2,3
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [4]:
df2=pd.DataFrame(data_list,columns=('Name','Age','City','Salary'))
df2

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


**Selection and Indexing of Columns**

In [5]:
df.index

RangeIndex(start=0, stop=4, step=1)

In [6]:
df2[["Name","City"]]


Unnamed: 0,Name,City
0,John,New York
1,Anna,Paris
2,Peter,Berlin
3,Linda,London


**Creating a new column**

In [7]:
df2["Designation"] = ["Doctor","Eng.","Doctor","Eng."]
df2

Unnamed: 0,Name,Age,City,Salary,Designation
0,John,28,New York,65000,Doctor
1,Anna,34,Paris,70000,Eng.
2,Peter,29,Berlin,62000,Doctor
3,Linda,42,London,85000,Eng.


In [8]:
df2.insert(1, 'Gender', ['M', 'F', 'M','F']) #add at specific position
df2

Unnamed: 0,Name,Gender,Age,City,Salary,Designation
0,John,M,28,New York,65000,Doctor
1,Anna,F,34,Paris,70000,Eng.
2,Peter,M,29,Berlin,62000,Doctor
3,Linda,F,42,London,85000,Eng.


**Removing Columns**

In [9]:
#Delete multiple columns
df.drop(['City' , 'Age'],axis=1)

Unnamed: 0,Name,Salary
0,John,65000
1,Anna,70000
2,Peter,62000
3,Linda,85000


In [10]:
#delete the column from actual dataframe
df2.drop(columns=['City'],inplace=True)
df2

Unnamed: 0,Name,Gender,Age,Salary,Designation
0,John,M,28,65000,Doctor
1,Anna,F,34,70000,Eng.
2,Peter,M,29,62000,Doctor
3,Linda,F,42,85000,Eng.




**Selecting Rows**

In [11]:
df2

Unnamed: 0,Name,Gender,Age,Salary,Designation
0,John,M,28,65000,Doctor
1,Anna,F,34,70000,Eng.
2,Peter,M,29,62000,Doctor
3,Linda,F,42,85000,Eng.


In [12]:
df2.iloc[0]

Name             John
Gender              M
Age                28
Salary          65000
Designation    Doctor
Name: 0, dtype: object

In [13]:
df2.loc[0]

Name             John
Gender              M
Age                28
Salary          65000
Designation    Doctor
Name: 0, dtype: object

In [14]:
df2.loc[0:2]

Unnamed: 0,Name,Gender,Age,Salary,Designation
0,John,M,28,65000,Doctor
1,Anna,F,34,70000,Eng.
2,Peter,M,29,62000,Doctor


In [15]:
df2.index=['J','A','P','L']
df2.loc['P']

Name            Peter
Gender              M
Age                29
Salary          62000
Designation    Doctor
Name: P, dtype: object

In [16]:
df2.iloc[1]

Name            Anna
Gender             F
Age               34
Salary         70000
Designation     Eng.
Name: A, dtype: object

In [17]:
df2.iloc[0:2]

Unnamed: 0,Name,Gender,Age,Salary,Designation
J,John,M,28,65000,Doctor
A,Anna,F,34,70000,Eng.


**Selecting Subsets of Rows and Columns**

In [20]:
df

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [23]:
df.loc[[0,3,2],["Name","Age"]]

Unnamed: 0,Name,Age
0,John,28
3,Linda,42
2,Peter,29


In [28]:
df2.iloc[[0,1],[0,1]]

Unnamed: 0,Name,Gender
J,John,M
A,Anna,F


In [29]:
df2.loc[["J","P"],["Name","Age"]]

Unnamed: 0,Name,Age
J,John,28
P,Peter,29


In [39]:
firstRow=df2.head()
firstRow

Unnamed: 0,Name,Gender,Age,City,Salary,Designation
0,John,M,28,New York,65000,Doctor
1,Anna,F,34,Paris,70000,Eng.
2,Peter,M,29,Berlin,62000,Doctor
3,Linda,F,42,London,85000,Eng.


In [40]:
df2.tail(3)

Unnamed: 0,Name,Gender,Age,City,Salary,Designation
1,Anna,F,34,Paris,70000,Eng.
2,Peter,M,29,Berlin,62000,Doctor
3,Linda,F,42,London,85000,Eng.





**Conditional Selection**


In [35]:
df2["Age"]>30

J    False
A     True
P    False
L     True
Name: Age, dtype: bool

In [32]:
df2[df2["Age"]>30]

Unnamed: 0,Name,Gender,Age,Salary,Designation
A,Anna,F,34,70000,Eng.
L,Linda,F,42,85000,Eng.


In [33]:
df2

Unnamed: 0,Name,Gender,Age,Salary,Designation
J,John,M,28,65000,Doctor
A,Anna,F,34,70000,Eng.
P,Peter,M,29,62000,Doctor
L,Linda,F,42,85000,Eng.


In [41]:
df2[(df2["Gender"]=='F') & (df2['Salary']>60000)]

Unnamed: 0,Name,Gender,Age,Salary,Designation
A,Anna,F,34,70000,Eng.
L,Linda,F,42,85000,Eng.
