In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [28]:
df = pd.DataFrame()
df['Names'] = ['Niraj','Kalyan','Python']
df['Age'] = [20,None,24]
df['City'] = ['Hyd','Blr','Chennai']
df['Phone'] = ['234','867','234']
df

Unnamed: 0,Names,Age,City,Phone
0,Niraj,20.0,Hyd,234
1,Kalyan,,Blr,867
2,Python,24.0,Chennai,234


# Shape

In [4]:
df.shape # (rows,columns)

(3, 4)

# Size

In [6]:
df.size # no of elements

12

# Columns

In [8]:
df.columns.tolist()

['Names', 'Age', 'City', 'Phone']

# isnull

In [32]:
df.isnull()

Unnamed: 0,Names,Age,City,Phone
0,False,False,False,False
1,False,True,False,False
2,False,False,False,False


# length

In [12]:
len(df) # no of rows

3

# info

In [30]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Names   3 non-null      object 
 1   Age     2 non-null      float64
 2   City    3 non-null      object 
 3   Phone   3 non-null      object 
dtypes: float64(1), object(3)
memory usage: 228.0+ bytes


# Drop Duplicates

**df.drop_duplicates()**

- subset: **Based on specific Columns**
- keep: 'First','Last','False'
- inplace: 'bool' = False,

In [37]:
df = pd.DataFrame()
df['Brand'] = ['Yum Yum','Yum Yum','Indomie','Indomie','Indomie']
df['Style'] = ['Cup','Cup','Cup','Pack','Pack']
df['Rating'] = [4,4,3.5,15,5]
df

Unnamed: 0,Brand,Style,Rating
0,Yum Yum,Cup,4.0
1,Yum Yum,Cup,4.0
2,Indomie,Cup,3.5
3,Indomie,Pack,15.0
4,Indomie,Pack,5.0


In [39]:
df.drop_duplicates()

Unnamed: 0,Brand,Style,Rating
0,Yum Yum,Cup,4.0
2,Indomie,Cup,3.5
3,Indomie,Pack,15.0
4,Indomie,Pack,5.0


In [41]:
df.drop_duplicates(subset = ['Brand'])

Unnamed: 0,Brand,Style,Rating
0,Yum Yum,Cup,4.0
2,Indomie,Cup,3.5


In [43]:
df.drop_duplicates(subset = ['Brand'], keep = 'last')

Unnamed: 0,Brand,Style,Rating
1,Yum Yum,Cup,4.0
4,Indomie,Pack,5.0


# df.rename

**df.rename()**

- mapper: 'Renamer | None' = None,
- index: 'Renamer | None' = None,
- columns: 'Renamer | None' = None,
- axis: 'Axis | None' = None,
- copy: 'bool | None' = None,
- inplace: 'bool' = False,

In [48]:
df = pd.DataFrame()
df['Names'] = ['Ramesh','Suresh','Sathish']
df['Age'] = [20,22,24]
df['City'] = ['Hyd','Blr','Chennai']
df

Unnamed: 0,Names,Age,City
0,Ramesh,20,Hyd
1,Suresh,22,Blr
2,Sathish,24,Chennai


In [50]:
df.columns

Index(['Names', 'Age', 'City'], dtype='object')

In [54]:
df.rename({'City' : 'city'}, axis = 1)

Unnamed: 0,Names,Age,city
0,Ramesh,20,Hyd
1,Suresh,22,Blr
2,Sathish,24,Chennai


In [56]:
df

Unnamed: 0,Names,Age,City
0,Ramesh,20,Hyd
1,Suresh,22,Blr
2,Sathish,24,Chennai


In [58]:
df.rename(index = {0:'A' , 1:'B', 2:'C'})

Unnamed: 0,Names,Age,City
A,Ramesh,20,Hyd
B,Suresh,22,Blr
C,Sathish,24,Chennai


In [60]:
inx = {0:'A' , 1:'B' , 2:'C'}
df.rename(index = inx)

Unnamed: 0,Names,Age,City
A,Ramesh,20,Hyd
B,Suresh,22,Blr
C,Sathish,24,Chennai


In [66]:
df.rename(columns = {'Names':'A', 'Age':'B', 'City':'C'})

Unnamed: 0,A,B,C
0,Ramesh,20,Hyd
1,Suresh,22,Blr
2,Sathish,24,Chennai


# Step-8 : Append the Rows

- To append the rows we need to use **Loc**

- **Syntax** : df.loc[row_number , column_name]

In [71]:
df

Unnamed: 0,Names,Age,City
0,Ramesh,20,Hyd
1,Suresh,22,Blr
2,Sathish,24,Chennai


In [78]:
df.loc[2,'City']

'Chennai'

- Multiple Rows from multiple columns

    - Example : df.loc[[1,2] , ['Names','City']]

In [81]:
df.loc[[0,2] , ['Names','City']]

Unnamed: 0,Names,City
0,Ramesh,Hyd
2,Sathish,Chennai


- df.loc[Start:Stop:Step]

    - df.loc[0:3,'City']

In [84]:
df.loc[0:3,'City']

0        Hyd
1        Blr
2    Chennai
Name: City, dtype: object

In [86]:
df.loc[2]

Names    Sathish
Age           24
City     Chennai
Name: 2, dtype: object

In [88]:
df.loc[[2]]

Unnamed: 0,Names,Age,City
2,Sathish,24,Chennai


## Difference between df.loc[2] and df.loc[[2]]

In [95]:
df

Unnamed: 0,Names,Age,City
0,Ramesh,20,Hyd
1,Suresh,22,Blr
2,Sathish,24,Chennai


In [91]:
print(type(df.loc[2]))
print(type(df.loc[[2]]))

<class 'pandas.core.series.Series'>
<class 'pandas.core.frame.DataFrame'>


In [93]:
df.loc[[1,2]]

Unnamed: 0,Names,Age,City
1,Suresh,22,Blr
2,Sathish,24,Chennai


In [99]:
df.loc[[1,2],]

Unnamed: 0,Names,Age,City
1,Suresh,22,Blr
2,Sathish,24,Chennai


In [101]:
df.loc[0:3]

Unnamed: 0,Names,Age,City
0,Ramesh,20,Hyd
1,Suresh,22,Blr
2,Sathish,24,Chennai


In [103]:
df.loc[:]

Unnamed: 0,Names,Age,City
0,Ramesh,20,Hyd
1,Suresh,22,Blr
2,Sathish,24,Chennai


In [105]:
df.loc[2,'City']

'Chennai'

In [107]:
df.loc[[2],['City']]

Unnamed: 0,City
2,Chennai


# df.iloc()

- df.loc[row_number : column_name]

- df.iloc[row_number : column_number]

In [112]:
df

Unnamed: 0,Names,Age,City
0,Ramesh,20,Hyd
1,Suresh,22,Blr
2,Sathish,24,Chennai


In [114]:
df.iloc[2,2]

'Chennai'

In [116]:
df.iloc[2]

Names    Sathish
Age           24
City     Chennai
Name: 2, dtype: object

In [118]:
df.iloc[[2]]

Unnamed: 0,Names,Age,City
2,Sathish,24,Chennai


In [120]:
df.iloc[[1,2]]

Unnamed: 0,Names,Age,City
1,Suresh,22,Blr
2,Sathish,24,Chennai


In [122]:
df.iloc[0:3]

Unnamed: 0,Names,Age,City
0,Ramesh,20,Hyd
1,Suresh,22,Blr
2,Sathish,24,Chennai


In [124]:
df.iloc[:]

Unnamed: 0,Names,Age,City
0,Ramesh,20,Hyd
1,Suresh,22,Blr
2,Sathish,24,Chennai


In [126]:
df.iloc[2,2]

'Chennai'

In [128]:
df.iloc[[2],[2]]

Unnamed: 0,City
2,Chennai


In [130]:
df.iloc[[1,2] , 2]

1        Blr
2    Chennai
Name: City, dtype: object

In [134]:
df.iloc[:,2]

0        Hyd
1        Blr
2    Chennai
Name: City, dtype: object

## Save the file

In [138]:
df

Unnamed: 0,Names,Age,City
0,Ramesh,20,Hyd
1,Suresh,22,Blr
2,Sathish,24,Chennai


In [144]:
df.to_csv('Employees.csv', index = False)

In [146]:
pd.read_csv('Employees.csv')

Unnamed: 0,Names,Age,City
0,Ramesh,20,Hyd
1,Suresh,22,Blr
2,Sathish,24,Chennai
