In [1]:
import pandas as pd
import numpy as np

In [2]:
data = {
    "age": [22, 28, 35, 45, 52],
    "salary": [30000, 45000, 60000, 80000, 150000],
    "experience": [1, 3, 7, 15, 25],
    "city": ["Colombo", "Kandy", "Colombo", "Galle", "Colombo"]
}

df = pd.DataFrame(data)
print(df)

   age  salary  experience     city
0   22   30000           1  Colombo
1   28   45000           3    Kandy
2   35   60000           7  Colombo
3   45   80000          15    Galle
4   52  150000          25  Colombo


In [3]:
# combining existing columns

df["salary_per_experience"] = df["salary"] / df["experience"]

print("Salary per experience: ")
print(df[["salary", "experience", "salary_per_experience"]])

Salary per experience: 
   salary  experience  salary_per_experience
0   30000           1           30000.000000
1   45000           3           15000.000000
2   60000           7            8571.428571
3   80000          15            5333.333333
4  150000          25            6000.000000


In [4]:
# create Age Groups

df["age_group"] = pd.cut(
    df["age"],
    bins=[0,25,40, 60],
    labels=["Young", "Adult", "senior"]
)

print("Age Groups: ")
print(df[["age","age_group"]])

Age Groups: 
   age age_group
0   22     Young
1   28     Adult
2   35     Adult
3   45    senior
4   52    senior


In [5]:
# Encoding categorical feature

df_encoded = pd.get_dummies(df, columns=["city"])

print("After one-hot encoding: ")
print(df_encoded)

After one-hot encoding: 
   age  salary  experience  salary_per_experience age_group  city_Colombo  \
0   22   30000           1           30000.000000     Young          True   
1   28   45000           3           15000.000000     Adult         False   
2   35   60000           7            8571.428571     Adult          True   
3   45   80000          15            5333.333333    senior         False   
4   52  150000          25            6000.000000    senior          True   

   city_Galle  city_Kandy  
0       False       False  
1       False        True  
2       False       False  
3        True       False  
4       False       False  


In [6]:
# apply log transform

df_encoded["salary_log"] = np.log1p(df_encoded["salary"])

print("Salary before and after log transform: ")
print(df_encoded[["salary", "salary_log"]])

Salary before and after log transform: 
   salary  salary_log
0   30000   10.308986
1   45000   10.714440
2   60000   11.002117
3   80000   11.289794
4  150000   11.918397
