In [1]:
# Question 1: Feature Scaling
# Task: Explain why feature scaling is essential and demonstrate the impact of unscaled features on a machine learning model.






import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Sample dataset with unscaled features having different ranges
data = {
    'Age': [25, 45, 35, 50, 23, 40, 60, 48],
    'Income': [50000, 100000, 75000, 120000, 40000, 95000, 130000, 110000],
    'Purchased': [0, 1, 0, 1, 0, 1, 1, 1]
}
df = pd.DataFrame(data)

X = df[['Age', 'Income']]
y = df['Purchased']

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.3)

# Model without scaling
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
y_pred_unscaled = knn.predict(X_test)
acc_unscaled = accuracy_score(y_test, y_pred_unscaled)

# Model with scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

knn_scaled = KNeighborsClassifier()
knn_scaled.fit(X_train_scaled, y_train)
y_pred_scaled = knn_scaled.predict(X_test_scaled)
acc_scaled = accuracy_score(y_test, y_pred_scaled)

print(f"Accuracy without scaling: {acc_unscaled:.2f}")
print(f"Accuracy with scaling: {acc_scaled:.2f}")



# Question 2: Min-Max Scaling
# Task: Implement Min-Max Scaling on the Iris dataset.



import pandas as pd
from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler

# Load Iris dataset
iris = load_iris()
X = iris.data
feature_names = iris.feature_names

# Convert to DataFrame for easier handling
df = pd.DataFrame(X, columns=feature_names)

print("Before scaling:")
print(df.describe())

# Initialize MinMaxScaler
scaler = MinMaxScaler()

# Fit and transform the data
X_scaled = scaler.fit_transform(df)

# Convert scaled data back to DataFrame
df_scaled = pd.DataFrame(X_scaled, columns=feature_names)

print("\nAfter Min-Max scaling:")
print(df_scaled.describe())






# Question 3: Standardization (Z-score Scaling)
# Task: Implement Standardization using Z-score scaling on the Iris dataset.




import pandas as pd
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler

# Load Iris dataset
iris = load_iris()
X = iris.data
feature_names = iris.feature_names

# Convert to DataFrame for readability
df = pd.DataFrame(X, columns=feature_names)

print("Before standardization:")
print(df.describe())

# Initialize StandardScaler
scaler = StandardScaler()

# Fit and transform the data
X_scaled = scaler.fit_transform(df)

# Convert scaled data back to DataFrame
df_scaled = pd.DataFrame(X_scaled, columns=feature_names)

print("\nAfter Z-score standardization:")
print(df_scaled.describe())




# Question 4: Robust Scaling
# Task: Implement Robust Scaling to handle outliers in the Iris dataset.




import pandas as pd
from sklearn.datasets import load_iris
from sklearn.preprocessing import RobustScaler

# Load Iris dataset
iris = load_iris()
X = iris.data
feature_names = iris.feature_names

# Convert to DataFrame for clarity
df = pd.DataFrame(X, columns=feature_names)

print("Before Robust Scaling:")
print(df.describe())

# Initialize RobustScaler
scaler = RobustScaler()

# Fit and transform the data
X_robust_scaled = scaler.fit_transform(df)

# Convert back to DataFrame
df_robust_scaled = pd.DataFrame(X_robust_scaled, columns=feature_names)

print("\nAfter Robust Scaling:")
print(df_robust_scaled.describe())




Accuracy without scaling: 0.67
Accuracy with scaling: 0.67
Before scaling:
       sepal length (cm)  sepal width (cm)  petal length (cm)  \
count         150.000000        150.000000         150.000000   
mean            5.843333          3.057333           3.758000   
std             0.828066          0.435866           1.765298   
min             4.300000          2.000000           1.000000   
25%             5.100000          2.800000           1.600000   
50%             5.800000          3.000000           4.350000   
75%             6.400000          3.300000           5.100000   
max             7.900000          4.400000           6.900000   

       petal width (cm)  
count        150.000000  
mean           1.199333  
std            0.762238  
min            0.100000  
25%            0.300000  
50%            1.300000  
75%            1.800000  
max            2.500000  

After Min-Max scaling:
       sepal length (cm)  sepal width (cm)  petal length (cm)  \
count         15