In [2]:
import pandas as pd
import numpy as np

In [3]:
# Sample Data
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
    'Age': [25, np.nan, 30, 35, 40],
    'Gender': ['F', 'M', np.nan, 'M', 'F'],
    'Score': [85, 95, np.nan, 88, 92],
    'City': ['New York', 'Los Angeles', 'New York', 'Chicago', 'Los Angeles']
}

# Create DataFrame
df = pd.DataFrame(data)

In [4]:
df

Unnamed: 0,Name,Age,Gender,Score,City
0,Alice,25.0,F,85.0,New York
1,Bob,,M,95.0,Los Angeles
2,Charlie,30.0,,,New York
3,David,35.0,M,88.0,Chicago
4,Eve,40.0,F,92.0,Los Angeles


# Column operations

### 1] columns

In [5]:
df.columns

Index(['Name', 'Age', 'Gender', 'Score', 'City'], dtype='object')

### 2] insert()

In [8]:
df.insert(loc=2,column='NewAdded',value=[1,2,3,4,5])

In [9]:
df

Unnamed: 0,Name,Age,NewAdded,Gender,Score,City
0,Alice,25.0,1,F,85.0,New York
1,Bob,,2,M,95.0,Los Angeles
2,Charlie,30.0,3,,,New York
3,David,35.0,4,M,88.0,Chicago
4,Eve,40.0,5,F,92.0,Los Angeles


### 3] rename()

In [12]:
df.rename(columns={'NewAdded':'Inserted_Column'},inplace=True)

In [13]:
df

Unnamed: 0,Name,Age,Inserted_Column,Gender,Score,City
0,Alice,25.0,1,F,85.0,New York
1,Bob,,2,M,95.0,Los Angeles
2,Charlie,30.0,3,,,New York
3,David,35.0,4,M,88.0,Chicago
4,Eve,40.0,5,F,92.0,Los Angeles


### 4] reset_index()

In [14]:
df.reset_index(inplace=True)

In [15]:
df

Unnamed: 0,index,Name,Age,Inserted_Column,Gender,Score,City
0,0,Alice,25.0,1,F,85.0,New York
1,1,Bob,,2,M,95.0,Los Angeles
2,2,Charlie,30.0,3,,,New York
3,3,David,35.0,4,M,88.0,Chicago
4,4,Eve,40.0,5,F,92.0,Los Angeles


### 5] set_index()

In [16]:
df.set_index('Name',inplace=True)

In [17]:
df

Unnamed: 0_level_0,index,Age,Inserted_Column,Gender,Score,City
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Alice,0,25.0,1,F,85.0,New York
Bob,1,,2,M,95.0,Los Angeles
Charlie,2,30.0,3,,,New York
David,3,35.0,4,M,88.0,Chicago
Eve,4,40.0,5,F,92.0,Los Angeles


# Drop method for dropping col and row

### 1] col

In [19]:
df.drop(columns=['index'],inplace=True)

In [20]:
df

Unnamed: 0_level_0,Age,Inserted_Column,Gender,Score,City
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Alice,25.0,1,F,85.0,New York
Bob,,2,M,95.0,Los Angeles
Charlie,30.0,3,,,New York
David,35.0,4,M,88.0,Chicago
Eve,40.0,5,F,92.0,Los Angeles


### 2] row

In [22]:
df_row_dropped=df.drop(index='Eve')

In [23]:
df_row_dropped

Unnamed: 0_level_0,Age,Inserted_Column,Gender,Score,City
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Alice,25.0,1,F,85.0,New York
Bob,,2,M,95.0,Los Angeles
Charlie,30.0,3,,,New York
David,35.0,4,M,88.0,Chicago


# Mathematical operations

In [32]:
df1=df[['Age','Score']]

In [39]:
df1

Unnamed: 0_level_0,Age,Score
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Alice,25.0,85.0
Bob,,95.0
Charlie,30.0,
David,35.0,88.0
Eve,40.0,92.0


In [33]:
data2 = {
    'Age': [5, 10, 15, 20, 25],
    'Score': [15, 5, 10, 12, 8]
}
df2 = pd.DataFrame(data2,index=['Alice','Bob','Charlie','David','Eve'])

#### ADD

In [35]:
df_add=df1.add(df2)

In [36]:
df_add

Unnamed: 0_level_0,Age,Score
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Alice,30.0,100.0
Bob,,100.0
Charlie,45.0,
David,55.0,100.0
Eve,65.0,100.0


#### Subtract

In [37]:
df_sub=df1.sub(df2)

In [38]:
df_sub

Unnamed: 0_level_0,Age,Score
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Alice,20.0,70.0
Bob,,90.0
Charlie,15.0,
David,15.0,76.0
Eve,15.0,84.0


#### Multiplication

In [40]:
df_mul=df1.mul(df2)

In [41]:
df_mul

Unnamed: 0_level_0,Age,Score
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Alice,125.0,1275.0
Bob,,475.0
Charlie,450.0,
David,700.0,1056.0
Eve,1000.0,736.0


#### Division

In [42]:
df_div=df1.div(df2)

In [43]:
df_div

Unnamed: 0_level_0,Age,Score
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Alice,5.0,5.666667
Bob,,19.0
Charlie,2.0,
David,1.75,7.333333
Eve,1.6,11.5


# Extracting and Filtering data

### index

In [53]:
df.index

Index(['Alice', 'Bob', 'Charlie', 'David', 'Eve'], dtype='object', name='Name')

### unique() - extracts the unique values in the dataframe

In [45]:
df['City'].unique()

array(['New York', 'Los Angeles', 'Chicago'], dtype=object)

In [50]:
df.index.unique()

Index(['Alice', 'Bob', 'Charlie', 'David', 'Eve'], dtype='object', name='Name')

### nunique()-returns count of the unique values in the dataframe

In [51]:
df['City'].nunique()

3

In [52]:
df.index.nunique()

5

### value_counts()- counts the number of times each unique value occurs within the Series

In [55]:
df['City'].value_counts()

New York       2
Los Angeles    2
Chicago        1
Name: City, dtype: int64

### between()-extracts rows where a column value falls in between a predefined range

In [56]:
df

Unnamed: 0_level_0,Age,Inserted_Column,Gender,Score,City
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Alice,25.0,1,F,85.0,New York
Bob,,2,M,95.0,Los Angeles
Charlie,30.0,3,,,New York
David,35.0,4,M,88.0,Chicago
Eve,40.0,5,F,92.0,Los Angeles


In [58]:
df[df['Age'].between(30,35)]

Unnamed: 0_level_0,Age,Inserted_Column,Gender,Score,City
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Charlie,30.0,3,,,New York
David,35.0,4,M,88.0,Chicago


### isin()-extracts rows from a DataFrame where a column value exists in a predefined collection

In [61]:
df[df['City'].isin(['New York','Chicago'])]

Unnamed: 0_level_0,Age,Inserted_Column,Gender,Score,City
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Alice,25.0,1,F,85.0,New York
Charlie,30.0,3,,,New York
David,35.0,4,M,88.0,Chicago


# Data Type converison

### dtypes

In [63]:
df.dtypes

Age                float64
Inserted_Column      int64
Gender              object
Score              float64
City                object
dtype: object

### astype

In [68]:
df['Age']=df['Age'].astype('float32',errors='ignore')

In [69]:
df.dtypes

Age                float32
Inserted_Column      int64
Gender              object
Score              float64
City                object
dtype: object

# Sorting

In [71]:
#ascending
df.sort_values(by='Inserted_Column')

Unnamed: 0_level_0,Age,Inserted_Column,Gender,Score,City
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Alice,25.0,1,F,85.0,New York
Bob,,2,M,95.0,Los Angeles
Charlie,30.0,3,,,New York
David,35.0,4,M,88.0,Chicago
Eve,40.0,5,F,92.0,Los Angeles


In [72]:
#descending
df.sort_values(by='Inserted_Column',ascending=False)

Unnamed: 0_level_0,Age,Inserted_Column,Gender,Score,City
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Eve,40.0,5,F,92.0,Los Angeles
David,35.0,4,M,88.0,Chicago
Charlie,30.0,3,,,New York
Bob,,2,M,95.0,Los Angeles
Alice,25.0,1,F,85.0,New York


In [74]:
#sort by index values
df.sort_index()

Unnamed: 0_level_0,Age,Inserted_Column,Gender,Score,City
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Alice,25.0,1,F,85.0,New York
Bob,,2,M,95.0,Los Angeles
Charlie,30.0,3,,,New York
David,35.0,4,M,88.0,Chicago
Eve,40.0,5,F,92.0,Los Angeles


In [75]:
#sorting by more than one columns
df.sort_values(["Inserted_Column","Score"])

Unnamed: 0_level_0,Age,Inserted_Column,Gender,Score,City
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Alice,25.0,1,F,85.0,New York
Bob,,2,M,95.0,Los Angeles
Charlie,30.0,3,,,New York
David,35.0,4,M,88.0,Chicago
Eve,40.0,5,F,92.0,Los Angeles


# Duplicate data handling

In [76]:
df['Duplicate'] = [1, 1, 2, 2, 3]

In [77]:
df

Unnamed: 0_level_0,Age,Inserted_Column,Gender,Score,City,Duplicate
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Alice,25.0,1,F,85.0,New York,1
Bob,,2,M,95.0,Los Angeles,1
Charlie,30.0,3,,,New York,2
David,35.0,4,M,88.0,Chicago,2
Eve,40.0,5,F,92.0,Los Angeles,3


In [79]:
#check for duplicates
df.duplicated(subset=['Duplicate'])

Name
Alice      False
Bob         True
Charlie    False
David       True
Eve        False
dtype: bool

In [81]:
#drop_duplicates()
df_new=df.drop_duplicates(subset=['Duplicate'])

In [82]:
df_new

Unnamed: 0_level_0,Age,Inserted_Column,Gender,Score,City,Duplicate
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Alice,25.0,1,F,85.0,New York,1
Charlie,30.0,3,,,New York,2
Eve,40.0,5,F,92.0,Los Angeles,3


# rank() Method

In [86]:
df['Score'].rank()

Name
Alice      1.0
Bob        4.0
Charlie    NaN
David      2.0
Eve        3.0
Name: Score, dtype: float64

# copy() Method

In [87]:
df_copy=df.copy()

In [88]:
df_copy

Unnamed: 0_level_0,Age,Inserted_Column,Gender,Score,City,Duplicate
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Alice,25.0,1,F,85.0,New York,1
Bob,,2,M,95.0,Los Angeles,1
Charlie,30.0,3,,,New York,2
David,35.0,4,M,88.0,Chicago,2
Eve,40.0,5,F,92.0,Los Angeles,3


# Exctracting row and column index labels

In [89]:
df.axes

[Index(['Alice', 'Bob', 'Charlie', 'David', 'Eve'], dtype='object', name='Name'),
 Index(['Age', 'Inserted_Column', 'Gender', 'Score', 'City', 'Duplicate'], dtype='object')]

# Dataframe instruction

In [90]:
#(row,col)
df.shape

(5, 6)

In [91]:
# 1d 2d etc...
df.ndim

2