### Problem Statement

1. Read the given dataset in the DataFrame.
2. Display the names of the columns of the DataFrame.
3. Store the gender and age_group column in list named features and exercise column
   in a list named exercise.
4. Apply One Hot Encoding to the gender and age_group columns using code from scratch.
5. Apply One Hot Encoding to the gender and age_group columns using library.
6. Apply Label Encoding to exercise column.
7. Compare the results obtained in One-Hot Encoding in both the cases.

### Importing Libraries

In [46]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

### Importing Dataset

In [47]:
data = pd.read_csv('Data/Age.csv')
data.head(10)

Unnamed: 0,gender,age_group,exercise
0,male,child,alternate
1,female,senior,alternate
2,male,teen,weekly
3,male,teen,daily
4,male,adult,weekly
5,female,teen,daily
6,male,adult,weekly
7,male,child,alternate
8,male,teen,alternate
9,male,adult,daily


### Display Names of Columns

In [48]:
data.columns

Index(['gender', 'age_group', 'exercise'], dtype='object')

### Extracting Features In a List

In [49]:
features = data[['gender','age_group']].values.tolist()
exercise = data['exercise'].tolist()

In [50]:
features[:10]

[['male', 'child'],
 ['female', 'senior'],
 ['male', 'teen'],
 ['male', 'teen'],
 ['male', 'adult'],
 ['female', 'teen'],
 ['male', 'adult'],
 ['male', 'child'],
 ['male', 'teen'],
 ['male', 'adult']]

In [51]:
exercise[:10]

['alternate',
 'alternate',
 'weekly',
 'daily',
 'weekly',
 'daily',
 'weekly',
 'alternate',
 'alternate',
 'daily']

### Encoding Scratch

In [52]:
gender_map = {'male': 0, 'female': 1}
age_group_map = {'child': 0, 'senior': 1, 'teen': 2, 'adult':3}

encoded = []

for row in features:
    gender_encoded = [1 if i == gender_map[row[0]] else 0 for i in range(len(gender_map))]
    age_group_encoded = [1 if i == age_group_map[row[1]] else 0 for i in range(len(age_group_map))]
    encoded.append(gender_encoded + age_group_encoded)

encoded = pd.DataFrame(encoded, columns=[
    'gender_male', 'gender_female', 'age_group_child', 'age_group_senior', 'age_group_teen', 'age_group_adult'
])

In [53]:
encoded.head(10)

Unnamed: 0,gender_male,gender_female,age_group_child,age_group_senior,age_group_teen,age_group_adult
0,1,0,1,0,0,0
1,0,1,0,1,0,0
2,1,0,0,0,1,0
3,1,0,0,0,1,0
4,1,0,0,0,0,1
5,0,1,0,0,1,0
6,1,0,0,0,0,1
7,1,0,1,0,0,0
8,1,0,0,0,1,0
9,1,0,0,0,0,1


### Encoding Features

In [54]:
encoded_features = pd.get_dummies(data, columns=['gender','age_group'])
encoded_features

Unnamed: 0,exercise,gender_female,gender_male,age_group_adult,age_group_child,age_group_senior,age_group_teen
0,alternate,False,True,False,True,False,False
1,alternate,True,False,False,False,True,False
2,weekly,False,True,False,False,False,True
3,daily,False,True,False,False,False,True
4,weekly,False,True,True,False,False,False
...,...,...,...,...,...,...,...
95,daily,True,False,False,False,True,False
96,weekly,True,False,False,False,True,False
97,alternate,False,True,False,False,True,False
98,daily,True,False,False,False,True,False


### Label Encoding (Scratch)

In [64]:
exercise_map = {'alternate': 0, 'daily': 1, 'weekly': 2}

encoded_exercise = []

for row in exercise:
    if row in exercise_map:
        encoded_exercise.append(exercise_map[row])

In [65]:
encoded_exercise[:10]

[0, 0, 2, 1, 2, 1, 2, 0, 0, 1]

### Label Encoding

In [57]:
encoder = LabelEncoder()
encoded_exercise = encoder.fit_transform(exercise)
encoded_exercise

array([0, 0, 2, 1, 2, 1, 2, 0, 0, 1, 1, 1, 0, 0, 2, 0, 1, 2, 1, 2, 2, 2,
       1, 1, 2, 1, 2, 0, 2, 1, 2, 2, 0, 2, 1, 0, 0, 2, 1, 1, 1, 1, 2, 0,
       1, 2, 2, 1, 2, 1, 0, 0, 2, 1, 2, 0, 0, 1, 2, 1, 2, 0, 1, 1, 1, 0,
       2, 0, 0, 2, 0, 2, 2, 2, 1, 2, 0, 1, 1, 2, 1, 1, 2, 0, 1, 1, 1, 1,
       2, 1, 0, 2, 2, 0, 2, 1, 2, 0, 1, 1], dtype=int64)