In [3]:
import pandas as pd
from sklearn.impute import SimpleImputer
data = {'Age': [25, 20, None, 35, 40],'Salary': [50000, None, 65000, 70000, 80000]}
df = pd.DataFrame(data)
print("Data Frame before imputing missing values using mean:\n", df)
imputer = SimpleImputer(strategy='mean')
df[['Age', 'Salary']] = imputer.fit_transform(df[['Age', 'Salary']])
print("\nData Frame after imputing missing values using mean:\n", df)


Data Frame before imputing missing values using mean:
     Age   Salary
0  25.0  50000.0
1  20.0      NaN
2   NaN  65000.0
3  35.0  70000.0
4  40.0  80000.0

Data Frame after imputing missing values using mean:
     Age   Salary
0  25.0  50000.0
1  20.0  66250.0
2  30.0  65000.0
3  35.0  70000.0
4  40.0  80000.0


In [4]:
import pandas as pd
from sklearn.impute import SimpleImputer
data = {'Age': [25, 30, None, 35, 40],'Salary': [50000, None, 65000, 70000, 80000]}
df = pd.DataFrame(data)
print("Data Frame before forward fill method:\n", df)
df.fillna(method='ffill', inplace=True)
print("\nData Frame after forward fill method:\n", df)



Data Frame before forward fill method:
     Age   Salary
0  25.0  50000.0
1  30.0      NaN
2   NaN  65000.0
3  35.0  70000.0
4  40.0  80000.0

Data Frame after forward fill method:
     Age   Salary
0  25.0  50000.0
1  30.0  50000.0
2  30.0  65000.0
3  35.0  70000.0
4  40.0  80000.0


  df.fillna(method='ffill', inplace=True)


In [5]:
import pandas as pd  
data = {'Age': [25, None, 35, 40],'Salary': [50000, None, 65000, 70000]}  
df = pd.DataFrame(data)  
print("Data Frame before backward fill method:\n", df)  
df.fillna(method='bfill', inplace=True)  
print("\nData Frame after backward fill method:\n", df)


Data Frame before backward fill method:
     Age   Salary
0  25.0  50000.0
1   NaN      NaN
2  35.0  65000.0
3  40.0  70000.0

Data Frame after backward fill method:
     Age   Salary
0  25.0  50000.0
1  35.0  65000.0
2  35.0  65000.0
3  40.0  70000.0


  df.fillna(method='bfill', inplace=True)


In [6]:
import pandas as pd  
from sklearn.preprocessing import OneHotEncoder  
data = {'City': ['Mandya', 'Bengaluru', 'India', 'Durga', 'Jalandhar', 'Tumakuru']}  
df = pd.DataFrame(data)  
print("Data Frame before One Hot Encoding:\n", df)  
encoder = OneHotEncoder(sparse_output=False)  
encoded = encoder.fit_transform(df[['City']])  
encoded_df = pd.DataFrame(encoded, columns=encoder.get_feature_names_out(['City']))  
print("\nData Frame after One Hot Encoding:\n", encoded_df)


Data Frame before One Hot Encoding:
         City
0     Mandya
1  Bengaluru
2      India
3      Durga
4  Jalandhar
5   Tumakuru

Data Frame after One Hot Encoding:
    City_Bengaluru  City_Durga  City_India  City_Jalandhar  City_Mandya  \
0             0.0         0.0         0.0             0.0          1.0   
1             1.0         0.0         0.0             0.0          0.0   
2             0.0         0.0         1.0             0.0          0.0   
3             0.0         1.0         0.0             0.0          0.0   
4             0.0         0.0         0.0             1.0          0.0   
5             0.0         0.0         0.0             0.0          0.0   

   City_Tumakuru  
0            0.0  
1            0.0  
2            0.0  
3            0.0  
4            0.0  
5            1.0  


In [7]:
import pandas as pd  
from sklearn.preprocessing import LabelEncoder  
df = pd.DataFrame({'Gender': ['Male', 'Female', 'Female', 'Male', 'Female']})  
print("Data Frame before Label Encoding:\n", df)  
encoder = LabelEncoder()  
df['Gender'] = encoder.fit_transform(df['Gender'])  
print("\nData Frame after Label Encoding:\n", df)

Data Frame before Label Encoding:
    Gender
0    Male
1  Female
2  Female
3    Male
4  Female

Data Frame after Label Encoding:
    Gender
0       1
1       0
2       0
3       1
4       0


In [3]:
from sklearn.preprocessing import MinMaxScaler
import pandas as pd

df = pd.DataFrame({
    'A': [10, 20, 30, 40, 50],
    'B': [5, 15, 25, 35, 45]
})

print("Feature scaling before Min-Max:")
print(df)
scaler = MinMaxScaler()


df_scaled = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)


print("Feature scaling after Min-Max:")
print(df_scaled)

Feature scaling before Min-Max:
    A   B
0  10   5
1  20  15
2  30  25
3  40  35
4  50  45
Feature scaling after Min-Max:
      A     B
0  0.00  0.00
1  0.25  0.25
2  0.50  0.50
3  0.75  0.75
4  1.00  1.00


In [4]:
from sklearn.preprocessing import StandardScaler
import pandas as pd

# Sample data
data = {
    'salary': [25000, 30000, 45000, 60000, 65000, 70000, 85000]
}

# Create DataFrame
df = pd.DataFrame(data)

# Print before scaling
print("Feature scaling (before standardization):")
print(df)

# Initialize the scaler
scaler = StandardScaler()

# Fit and transform the data
df_scaled = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)

# Print after scaling
print("Feature scaling (after standardization):")
print(df_scaled)

Feature scaling (before standardization):
   salary
0   25000
1   30000
2   45000
3   60000
4   65000
5   70000
6   85000
Feature scaling (after standardization):
     salary
0 -1.445959
1 -1.199088
2 -0.458475
3  0.282138
4  0.529009
5  0.775880
6  1.516493
