## Pandas


In [None]:
import pandas as pd

In [None]:
data = {
    'Names': ['samar', 'gurinder', 'amit', 'daman', 'maahi'],
    'age': [21, 22, 23, 22, 21],
    'salary': [50000, 40000, 42000, 30000, 45000]
}

df = pd.DataFrame(data)

## Adding new column

In [None]:
df['Bonus'] = df['salary'] * 0.1
print(df)

## insert() method

In [None]:
# syntax = insert(loc, col_name, data)
df.insert(0,'ID', [i for i in range(101, 106)])
print(df)

## loc[] method

In [None]:
df.loc[0,'salary'] = 100000
print(df)

## increasing salary by 10%

In [None]:
df['salary'] += df['salary'] * 0.1
print(df)

## Delete column using drop() method

In [None]:
df.drop(columns=['Bonus'], inplace=True)
print(df)

# Handling Missing values in dataset

checking null values using isnull() method


In [None]:
data1 = {
    'Names': ['samar', 'gurinder', 'amit', None, 'maahi'],
    'age': [21, 22, 23, None, 21],
    'salary': [50000, 40000, 42000, None, 45000],
    'Performance_Score': [100, 100, 80, None, 50]
}

df1 = pd.DataFrame(data1)
print(df1)

In [None]:
df1.isnull()

count of missing values in columns

In [None]:
df1.isnull().sum()

Deleting missing values in dataset using dropna() method

In [None]:
# removing row 
# df1.dropna(axis=0, inplace=True)

# removing column
# df1.dropna(subset=['Names'])
print(df1)

Filling missing values using fillna() method

In [None]:
## filling missing values with zero
# df1.fillna(0, inplace=True)

df1.fillna(df1[['age', 'salary', 'Performance_Score']].mean(), inplace=True)
print(df1)

In [None]:
df1['Names'] = df1['Names'].fillna('Unknown')
print(df1)

## Interploation using interpolate() method

In [None]:
missing_data = {
    'roll_no': [101,102,103,104,105],
    'marks': [100,None,40,None,50]
}

md = pd.DataFrame(missing_data)
print(f'\nBefore interpolation \n{md}')
md['marks'] = md['marks'].interpolate(method='linear')
print(f'\nAfter interpolation \n{md}')

## Sorting data

In [None]:
# sorting data using .sort_values() method

unsort_data = {
    'Names': ['samar', 'gurinder', 'amit', 'daman', 'maahi'],
    'age': [21, 23, 22, 24, 23],
    'salary': [50000, 40000, 42000, 30000, 45000]
}

ud = pd.DataFrame(unsort_data)
# print(f'Before sorting \n{ud}')
# ud.sort_values(by='age', ascending=True, inplace=True)
# print(f'\nAfter sorting \n{ud}')

# sorting multiple columns

ud.sort_values(by=['age','salary'], ascending=[True, False], inplace=True)
print(ud)

## Aggregation and groupby() method


In [None]:
data = {
    'Names': ['samar', 'gurinder', 'amit', 'daman', 'maahi'],
    'age': [21, 23, 22, 24, 23],
    'salary': [50000, 40000, 42000, 30000, 45000]
}

df = pd.DataFrame(data)

# group based on single column
# group = df.groupby('age')['salary'].sum() 

# grouping based on multiple columns
group = df.groupby(['age','Names'])['salary'].sum()
print(group)


## Merging and Joining

In [None]:
customer = pd.DataFrame({
    'id': [1,2,3],
    'name': ['samar', 'gurinder', 'amit']
})

orders = pd.DataFrame({
    'id': [1,2,4],
    'price': [200,400,600]
})

merged = pd.merge(customer,orders,how='inner') # inner: only matching records

merged = pd.merge(customer,orders,how='outer') # outer: put NaN values on non matching records

merged = pd.merge(customer,orders,how='left') # left: all left records + only  right matching records

merged = pd.merge(customer,orders,how='right') # right: all right records + only left matching records

merged = pd.merge(customer,orders,how='cross') # cross: cross product of left and right records

print(merged)



In [None]:
customer1 = pd.DataFrame({
    'id': [1,2],
    'name': ['samar', 'gurinder']
})

customer2 = pd.DataFrame({
    'id': [3,4],
    'name': ['amit', 'daman']
})

concat = pd.concat([customer1, customer2], ignore_index=True)
print(concat)