In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler, LabelEncoder

# Create sample data
data = {
    'Student_ID': [101, 102, 103, 104, 105],
    'Math_Marks': [85, 92, 78, 95, 88],
    'Science_Marks': [90, 87, 82, 93, 85],
    'Grade': ['A', 'A+', 'B', 'A+', 'A']
}

# Create DataFrame
df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)
print("\n")

# 1. Min-Max Normalization (0-1 scaling)
min_max_scaler = MinMaxScaler()
marks_columns = ['Math_Marks', 'Science_Marks']
df_minmax = df.copy()
df_minmax[marks_columns] = min_max_scaler.fit_transform(df[marks_columns])
print("After Min-Max Normalization:")
print(df_minmax)
print("\n")

# 2. Z-Score Normalization (Standardization)
standard_scaler = StandardScaler()
df_standard = df.copy()
df_standard[marks_columns] = standard_scaler.fit_transform(df[marks_columns])
print("After Z-Score Normalization:")
print(df_standard)
print("\n")

# 3. One-Hot Encoding for Grade
df_onehot = df.copy()
grade_onehot = pd.get_dummies(df['Grade'], prefix='Grade')
df_onehot = pd.concat([df_onehot, grade_onehot], axis=1)
df_onehot = df_onehot.drop('Grade', axis=1)
print("After One-Hot Encoding:")
print(df_onehot)
print("\n")

# 4. Label Encoding for Grade
df_label = df.copy()
label_encoder = LabelEncoder()
df_label['Grade_Encoded'] = label_encoder.fit_transform(df['Grade'])
print("After Label Encoding:")
print(df_label)

Original DataFrame:
   Student_ID  Math_Marks  Science_Marks Grade
0         101          85             90     A
1         102          92             87    A+
2         103          78             82     B
3         104          95             93    A+
4         105          88             85     A


After Min-Max Normalization:
   Student_ID  Math_Marks  Science_Marks Grade
0         101    0.411765       0.727273     A
1         102    0.823529       0.454545    A+
2         103    0.000000       0.000000     B
3         104    1.000000       1.000000    A+
4         105    0.588235       0.272727     A


After Z-Score Normalization:
   Student_ID  Math_Marks  Science_Marks Grade
0         101   -0.441758       0.679521     A
1         102    0.747590      -0.104542    A+
2         103   -1.631106      -1.411313     B
3         104    1.257311       1.463583    A+
4         105    0.067963      -0.627250     A


After One-Hot Encoding:
   Student_ID  Math_Marks  Science_Marks  Grad

In [None]:
import pandas as pd

# Sample data
data = pd.DataFrame({"Fruit": ["Apple", "Banana", "Orange"]})

# Label Encoding
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
data["Label_Encoded"] = le.fit_transform(data["Fruit"])
print("Label Encoded:\n", data)

# One-Hot Encoding
one_hot = pd.get_dummies(data["Fruit"], prefix="Fruit")
data = pd.concat([data, one_hot], axis=1)
print("\nOne-Hot Encoded:\n", data)

In [None]:
Label Encoded:
     Fruit  Label_Encoded
0    Apple              0
1   Banana              1
2   Orange              2

One-Hot Encoded:
     Fruit  Label_Encoded  Fruit_Apple  Fruit_Banana  Fruit_Orange
0    Apple              0            1             0             0
1   Banana              1            0             1             0
2   Orange              2            0             0             1

In [None]:
Let's create a sample student dataset and perform various data preprocessing operations using Python with pandas and scikit-learn. Here's a complete example:

```python
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler, LabelEncoder

# Create sample data
data = {
    'Student_ID': [101, 102, 103, 104, 105],
    'Math_Marks': [85, 92, 78, 95, 88],
    'Science_Marks': [90, 87, 82, 93, 85],
    'Grade': ['A', 'A+', 'B', 'A+', 'A']
}

# Create DataFrame
df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)
print("\n")

# 1. Min-Max Normalization (0-1 scaling)
min_max_scaler = MinMaxScaler()
marks_columns = ['Math_Marks', 'Science_Marks']
df_minmax = df.copy()
df_minmax[marks_columns] = min_max_scaler.fit_transform(df[marks_columns])
print("After Min-Max Normalization:")
print(df_minmax)
print("\n")

# 2. Z-Score Normalization (Standardization)
standard_scaler = StandardScaler()
df_standard = df.copy()
df_standard[marks_columns] = standard_scaler.fit_transform(df[marks_columns])
print("After Z-Score Normalization:")
print(df_standard)
print("\n")

# 3. One-Hot Encoding for Grade
df_onehot = df.copy()
grade_onehot = pd.get_dummies(df['Grade'], prefix='Grade')
df_onehot = pd.concat([df_onehot, grade_onehot], axis=1)
df_onehot = df_onehot.drop('Grade', axis=1)
print("After One-Hot Encoding:")
print(df_onehot)
print("\n")

# 4. Label Encoding for Grade
df_label = df.copy()
label_encoder = LabelEncoder()
df_label['Grade_Encoded'] = label_encoder.fit_transform(df['Grade'])
print("After Label Encoding:")
print(df_label)
```

This code will output:

```
Original DataFrame:
   Student_ID  Math_Marks  Science_Marks Grade
0        101         85             90     A
1        102         92             87    A+
2        103         78             82     B
3        104         95             93    A+
4        105         88             85     A

After Min-Max Normalization:
   Student_ID  Math_Marks  Science_Marks Grade
0        101    0.411765       0.727273     A
1        102    0.823529       0.454545    A+
2        103    0.000000       0.000000     B
3        104    1.000000       1.000000    A+
4        105    0.588235       0.272727     A

After Z-Score Normalization:
   Student_ID  Math_Marks  Science_Marks Grade
0        101   -0.524891      0.774597     A
1        102    0.612984      0.000000    A+
2        103   -1.662766     -1.549193     B
3        104    1.181921      1.549193    A+
4        105    0.392752     -0.774597     A

After One-Hot Encoding:
   Student_ID  Math_Marks  Science_Marks  Grade_A  Grade_A+  Grade_B
0        101         85             90     True     False    False
1        102         92             87    False      True    False
2        103         78             82    False     False     True
3        104         95             93    False      True    False
4        105         88             85     True     False    False

After Label Encoding:
   Student_ID  Math_Marks  Science_Marks Grade  Grade_Encoded
0        101         85             90     A              0
1        102         92             87    A+              1
2        103         78             82     B              2
3        104         95             93    A+              1
4        105         88             85     A              0
```

Explanation of operations:

1. **Min-Max Normalization**: 
   - Scales values to a range [0,1]
   - Formula: (x - min) / (max - min)
   - Useful when you need bounded values

2. **Z-Score Normalization (Standardization)**:
   - Transforms data to have mean=0 and standard deviation=1
   - Formula: (x - mean) / standard_deviation
   - Useful for many ML algorithms that assume normal distribution

3. **One-Hot Encoding**:
   - Converts categorical variables into binary vectors
   - Creates separate columns for each category
   - Useful for nominal data where no order exists

4. **Label Encoding**:
   - Converts categories to numerical values
   - Assigns a unique integer to each category
   - Useful for ordinal data or when maintaining a single column is preferred

You can run this code in any Python environment with pandas and scikit-learn installed. The operations shown are common preprocessing steps in data analysis and machine learning workflows.