In [66]:
import pandas as pd
import numpy as np

In [67]:
data = [['Raju',21,30000],['Vijay',22,25000],['Mahesh',23,36000],['Vivek',23,12000],['Sanjana',22,28000]]
df = pd.DataFrame(data,columns=['Name','Age','Salary'])
df.head()

Unnamed: 0,Name,Age,Salary
0,Raju,21,30000
1,Vijay,22,25000
2,Mahesh,23,36000
3,Vivek,23,12000
4,Sanjana,22,28000


- SLICING 

slicing rows in a dataframe

In [68]:
newdf1 = df.iloc[0:3] # removing last 2 rows
newdf1.head()

Unnamed: 0,Name,Age,Salary
0,Raju,21,30000
1,Vijay,22,25000
2,Mahesh,23,36000


slicing columns in a dataframe

In [69]:
newdf2 = df.iloc[:,0:2] # removing salary column
newdf2.head()

Unnamed: 0,Name,Age
0,Raju,21
1,Vijay,22
2,Mahesh,23
3,Vivek,23
4,Sanjana,22


- MANIPULATING DATA

In [70]:
df['Salary'] = df['Salary']+5000  # here we manipulated salary data by adding extra 5000
df.head()

Unnamed: 0,Name,Age,Salary
0,Raju,21,35000
1,Vijay,22,30000
2,Mahesh,23,41000
3,Vivek,23,17000
4,Sanjana,22,33000


- SORTING DATA

In [71]:
newdf2 = df.sort_values(by=['Age'],ascending=True)  # here we are sorting data in ascending order according to the column
newdf2.head()

Unnamed: 0,Name,Age,Salary
0,Raju,21,35000
1,Vijay,22,30000
4,Sanjana,22,33000
2,Mahesh,23,41000
3,Vivek,23,17000


In [72]:
newdf3 = df.sort_values(by=['Age'],ascending=False) # here we are sorting data in descending order according to the column
newdf3.head()

Unnamed: 0,Name,Age,Salary
2,Mahesh,23,41000
3,Vivek,23,17000
1,Vijay,22,30000
4,Sanjana,22,33000
0,Raju,21,35000


In [73]:
df.head()

Unnamed: 0,Name,Age,Salary
0,Raju,21,35000
1,Vijay,22,30000
2,Mahesh,23,41000
3,Vivek,23,17000
4,Sanjana,22,33000


In [74]:
# we can sort data without storing into another dataframe
df.sort_values(by=['Age'],ascending=True,inplace=True)
df.head()

Unnamed: 0,Name,Age,Salary
0,Raju,21,35000
1,Vijay,22,30000
4,Sanjana,22,33000
2,Mahesh,23,41000
3,Vivek,23,17000


- CHECKING FOR MISSING VALUES

In [75]:
data = [['Priya',2,30000],['Swaroop',np.NAN,25000],['Suresh',5,36000],['Vivek',6,],['Sanjana',10,50000]]
df2 = pd.DataFrame(data,columns=['Name','Experience','Salary'])
df2.head()

Unnamed: 0,Name,Experience,Salary
0,Priya,2.0,30000.0
1,Swaroop,,25000.0
2,Suresh,5.0,36000.0
3,Vivek,6.0,
4,Sanjana,10.0,50000.0


in the above dataframe we have missing values .. to check for missing values

In [76]:
df2.isnull().sum()

Name          0
Experience    1
Salary        1
dtype: int64

In [77]:
# if we want to remove those missing values 
newdf4 = df2.dropna()
newdf4.head()

Unnamed: 0,Name,Experience,Salary
0,Priya,2.0,30000.0
2,Suresh,5.0,36000.0
4,Sanjana,10.0,50000.0


In [78]:
# we can fill those missing values 
newdf5 = df2.fillna(0) # we are filling with 0 
newdf5.head()

Unnamed: 0,Name,Experience,Salary
0,Priya,2.0,30000.0
1,Swaroop,0.0,25000.0
2,Suresh,5.0,36000.0
3,Vivek,6.0,0.0
4,Sanjana,10.0,50000.0


In [79]:
# we can use median of the column
df2['Experience'] = df2['Experience'].fillna(df2['Experience'].median())   # here we filled with median of the experience column
df2.head()

Unnamed: 0,Name,Experience,Salary
0,Priya,2.0,30000.0
1,Swaroop,5.5,25000.0
2,Suresh,5.0,36000.0
3,Vivek,6.0,
4,Sanjana,10.0,50000.0


In [83]:
df2['Salary'] = df2['Salary'].fillna(df2['Salary'].median()) # here we filled with median of the column
df2.head()

Unnamed: 0,Name,Experience,Salary
0,Priya,2.0,30000.0
1,Swaroop,5.5,25000.0
2,Suresh,5.0,36000.0
3,Vivek,6.0,33000.0
4,Sanjana,10.0,50000.0


- DROPING A COLUMN

In [85]:
newdf6 = df2.drop(['Experience','Salary'],axis=1) # dropping mutliple columns
newdf6.head()

Unnamed: 0,Name
0,Priya
1,Swaroop
2,Suresh
3,Vivek
4,Sanjana


- CONCATING DATAFRAMES 

In [87]:
df.head()

Unnamed: 0,Name,Age,Salary
0,Raju,21,35000
1,Vijay,22,30000
4,Sanjana,22,33000
2,Mahesh,23,41000
3,Vivek,23,17000


In [89]:
df2.drop(['Name','Salary'],axis=1,inplace=True)
df2.head()

Unnamed: 0,Experience
0,2.0
1,5.5
2,5.0
3,6.0
4,10.0


In [91]:
combined_df = pd.concat([df,df2],axis=1,join="inner") # in the way we can join two data frames via columns
combined_df.head()

Unnamed: 0,Name,Age,Salary,Experience
0,Raju,21,35000,2.0
1,Vijay,22,30000,5.5
4,Sanjana,22,33000,10.0
2,Mahesh,23,41000,5.0
3,Vivek,23,17000,6.0


In [92]:
data = [['Ragha',26,30000],['Swathi',25,27000],['Mani',24,45000],['Durga',23,18000],['Keerthi',22,48000]]
df3 = pd.DataFrame(data,columns=['Name','Age','Salary'])

In [100]:
df3.head()

Unnamed: 0,Name,Age,Salary
0,Ragha,26,30000
1,Swathi,25,27000
2,Mani,24,45000
3,Durga,23,18000
4,Keerthi,22,48000


In [106]:
df.head()

Unnamed: 0,Name,Age,Salary
0,Raju,21,35000
1,Vijay,22,30000
4,Sanjana,22,33000
2,Mahesh,23,41000
3,Vivek,23,17000


In [110]:
# combining df and df3 
combined_df2 = pd.concat([df,df3])
combined_df2

Unnamed: 0,Name,Age,Salary
0,Raju,21,35000
1,Vijay,22,30000
4,Sanjana,22,33000
2,Mahesh,23,41000
3,Vivek,23,17000
0,Ragha,26,30000
1,Swathi,25,27000
2,Mani,24,45000
3,Durga,23,18000
4,Keerthi,22,48000


- RESETTING INDEX VALUES AFTER MANIPULATING DATA 

In [112]:
combined_df2.reset_index(drop=True,inplace=True)
combined_df2

Unnamed: 0,Name,Age,Salary
0,Raju,21,35000
1,Vijay,22,30000
2,Sanjana,22,33000
3,Mahesh,23,41000
4,Vivek,23,17000
5,Ragha,26,30000
6,Swathi,25,27000
7,Mani,24,45000
8,Durga,23,18000
9,Keerthi,22,48000


- ADDING A ROW TO combined_df2

In [118]:
new_row = pd.DataFrame({'Name':'Sudheer','Age':27,'Salary':38000},index=[0])
new_row

Unnamed: 0,Name,Age,Salary
0,Sudheer,27,38000


In [120]:
# adding this new row to existing data frame
combined_df3 = pd.concat([combined_df2,new_row]).reset_index(drop=True)
combined_df3

Unnamed: 0,Name,Age,Salary
0,Raju,21,35000
1,Vijay,22,30000
2,Sanjana,22,33000
3,Mahesh,23,41000
4,Vivek,23,17000
5,Ragha,26,30000
6,Swathi,25,27000
7,Mani,24,45000
8,Durga,23,18000
9,Keerthi,22,48000
