In [1]:
import pandas as pd
import numpy as np

# Load dataset
df = pd.read_csv("/content/Life Expectancy Data.csv")

# Show first 5 rows
print("First 5 rows of the dataset:")
print(df.head())

# Dataset info (columns, dtypes, missing values)
print("\nDataset Info:")
print(df.info())

# Check shape (rows, columns)
print("\nDataset Shape:")
print(df.shape)

# Check missing values
print("\nMissing Values per Column:")
print(df.isnull().sum())


First 5 rows of the dataset:
       Country  Year      Status  Life expectancy   Adult Mortality  \
0  Afghanistan  2015  Developing              65.0            263.0   
1  Afghanistan  2014  Developing              59.9            271.0   
2  Afghanistan  2013  Developing              59.9            268.0   
3  Afghanistan  2012  Developing              59.5            272.0   
4  Afghanistan  2011  Developing              59.2            275.0   

   infant deaths  Alcohol  percentage expenditure  Hepatitis B  Measles   ...  \
0             62     0.01               71.279624         65.0      1154  ...   
1             64     0.01               73.523582         62.0       492  ...   
2             66     0.01               73.219243         64.0       430  ...   
3             69     0.01               78.184215         67.0      2787  ...   
4             71     0.01                7.097109         68.0      3013  ...   

   Polio  Total expenditure  Diphtheria    HIV/AIDS      

In [2]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
df['Status_LE'] = le.fit_transform(df['Status'])
print(df[['Status', 'Status_LE']].head())


       Status  Status_LE
0  Developing          1
1  Developing          1
2  Developing          1
3  Developing          1
4  Developing          1


In [3]:
from sklearn.preprocessing import OrdinalEncoder

# Define order for 'Status'
encoder = OrdinalEncoder(categories=[['Developing', 'Developed']])
df['Status_OE'] = encoder.fit_transform(df[['Status']])
print(df[['Status', 'Status_OE']].head())


       Status  Status_OE
0  Developing        0.0
1  Developing        0.0
2  Developing        0.0
3  Developing        0.0
4  Developing        0.0


In [4]:
# Frequency Encoding for Status
freq_map = df['Status'].value_counts(normalize=True)
df['Status_FE'] = df['Status'].map(freq_map)
print(df[['Status', 'Status_FE']].head())


       Status  Status_FE
0  Developing   0.825732
1  Developing   0.825732
2  Developing   0.825732
3  Developing   0.825732
4  Developing   0.825732


In [6]:
df.columns = df.columns.str.strip()  # Removes leading/trailing spaces


In [7]:
#target encoding
target_mean = df.groupby('Status', observed=True)['Life expectancy'].mean()
df['Status_TE'] = df['Status'].map(target_mean)
print(df[['Status', 'Status_TE']].head())


       Status  Status_TE
0  Developing  67.111465
1  Developing  67.111465
2  Developing  67.111465
3  Developing  67.111465
4  Developing  67.111465
