<a href="https://colab.research.google.com/github/MohiniRathore20/Aiml-Lab--Mohini-rathore/blob/main/Aiml%20exp%207.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# Step 1: Import Libraries and Dataset
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, StandardScaler, OneHotEncoder
from sklearn.datasets import load_iris


# Load the Iris dataset
iris = load_iris()
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df['species'] = iris.target
df['species'] = df['species'].replace(dict(enumerate(iris.target_names)))

# Display basic details of the dataset
print("Basic Details:")
print(f"Number of rows: {df.shape[0]}, Number of columns: {df.shape[1]}")
print(df.info())

# Display the first 5 rows
print("\nFirst 5 rows (Before Preprocessing):")
print(df.head())

# Step 2: Identify Numerical and Categorical Columns
numerical_cols = df.select_dtypes(include=['float64', 'int64']).columns.tolist()
categorical_cols = df.select_dtypes(include=['object']).columns.tolist()

print("\nNumerical Columns:", numerical_cols)
print("Categorical Columns:", categorical_cols)

# Step 3: Feature Scaling (Min-Max Scaling and Z-Score Scaling)
# Min-Max Scaling
min_max_scaler = MinMaxScaler()
df_minmax_scaled = df.copy()
df_minmax_scaled[numerical_cols] = min_max_scaler.fit_transform(df[numerical_cols])

# Standardization (Z-Score Scaling)
standard_scaler = StandardScaler()
df_standard_scaled = df.copy()
df_standard_scaled[numerical_cols] = standard_scaler.fit_transform(df[numerical_cols])

# Display the datasets with scaled features
print("\nFirst 5 rows (After Min-Max Scaling):")
print(df_minmax_scaled.head())

print("\nFirst 5 rows (After Z-Score Scaling):")
print(df_standard_scaled.head())


# Step 4: One-Hot Encoding
# Replace 'sparse=False' with 'sparse_output=False'
encoder = OneHotEncoder(sparse_output=False, drop=None)  # Changed line
encoded_cats = encoder.fit_transform(df[categorical_cols])
encoded_cols = encoder.get_feature_names_out(categorical_cols)

# ... (rest of your code remains the same) ...

df_encoded = pd.DataFrame(encoded_cats, columns=encoded_cols)
df_onehot_encoded = pd.concat([df_standard_scaled.drop(categorical_cols, axis=1), df_encoded], axis=1)

print("\nFirst 5 rows (After One-Hot Encoding):")
print(df_onehot_encoded.head())

# Step 5: Memory and Performance Check
before_memory = df.memory_usage(deep=True).sum()
after_memory = df_onehot_encoded.memory_usage(deep=True).sum()

print(f"\nMemory Usage Before Preprocessing: {before_memory} bytes")
print(f"Memory Usage After Preprocessing: {after_memory} bytes")


Basic Details:
Number of rows: 150, Number of columns: 5
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  150 non-null    float64
 1   sepal width (cm)   150 non-null    float64
 2   petal length (cm)  150 non-null    float64
 3   petal width (cm)   150 non-null    float64
 4   species            150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB
None

First 5 rows (Before Preprocessing):
   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                5.1               3.5                1.4               0.2   
1                4.9               3.0                1.4               0.2   
2                4.7               3.2                1.3               0.2   
3                4.6               3.1                1.5               0.2   
4            