<a href="https://colab.research.google.com/github/LavyaS/skill-new/blob/main/IRIS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, OrdinalEncoder
import category_encoders as ce
import os

# 📂 Check if '/Iris.csv' exists
file_path = "/Iris.csv"

if not os.path.exists(file_path):
    print("🚨 File '/Iris.csv' not found! Please upload it.")
    from google.colab import files
    uploaded = files.upload()
    file_path = list(uploaded.keys())[0]  # Get uploaded filename

# 📥 Load dataset from CSV file
df = pd.read_csv(file_path)

# 🌟 Check dataset structure
print("Original Data:")
print(df.head())

# 1️⃣ Ordinal Encoding
ordinal_encoder = OrdinalEncoder()
df['Species_Ordinal'] = ordinal_encoder.fit_transform(df[['Species']])

# 2️⃣ Label Encoding
label_encoder = LabelEncoder()
df['Species_Label'] = label_encoder.fit_transform(df['Species'])

# 3️⃣ One-Hot Encoding (Fixed)
onehot_encoder = OneHotEncoder(sparse_output=False, drop='first')  # ✅ FIXED
onehot_encoded = onehot_encoder.fit_transform(df[['Species']])
onehot_df = pd.DataFrame(onehot_encoded, columns=onehot_encoder.get_feature_names_out(['Species']))
df = pd.concat([df, onehot_df], axis=1)

# 4️⃣ Binary Encoding
binary_encoder = ce.BinaryEncoder(cols=['Species'])
df_binary = binary_encoder.fit_transform(df[['Species']])
df = pd.concat([df, df_binary], axis=1)

# 5️⃣ Target Encoding (Using SepalLengthCm as target)
target_encoder = ce.TargetEncoder(cols=['Species'])
df['Species_Target'] = target_encoder.fit_transform(df['Species'], df.iloc[:, 0])

# 6️⃣ Frequency Encoding
freq_encoding = df['Species'].value_counts(normalize=True)
df['Species_Frequency'] = df['Species'].map(freq_encoding)

# 7️⃣ Fix for WOE Encoding: Convert Species into a Binary Target
df['Species_Binary'] = df['Species'].apply(lambda x: 1 if x == 'Iris-setosa' else 0)

# Apply WOE Encoding with Binary Target
woe_encoder = ce.WOEEncoder(cols=['Species'])
df['Species_WOE'] = woe_encoder.fit_transform(df['Species'], df['Species_Binary'])

# Remove temporary binary column
df.drop(columns=['Species_Binary'], inplace=True)

# 8️⃣ Hash Encoding
hash_encoder = ce.HashingEncoder(cols=['Species'], n_components=3)
df_hash = hash_encoder.fit_transform(df[['Species']])
df = pd.concat([df, df_hash], axis=1)

# 🖥 Display final encoded dataset
print("\nEncoded Data:")
print(df.head())

# ✅ Save Encoded Data to CSV (Optional)
df.to_csv("/Iris_Encoded.csv", index=False)
print("\nEncoded dataset saved as '/Iris_Encoded.csv'!")


Original Data:
   Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm      Species
0   1            5.1           3.5            1.4           0.2  Iris-setosa
1   2            4.9           3.0            1.4           0.2  Iris-setosa
2   3            4.7           3.2            1.3           0.2  Iris-setosa
3   4            4.6           3.1            1.5           0.2  Iris-setosa
4   5            5.0           3.6            1.4           0.2  Iris-setosa

Encoded Data:
   Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm      Species  \
0   1            5.1           3.5            1.4           0.2  Iris-setosa   
1   2            4.9           3.0            1.4           0.2  Iris-setosa   
2   3            4.7           3.2            1.3           0.2  Iris-setosa   
3   4            4.6           3.1            1.5           0.2  Iris-setosa   
4   5            5.0           3.6            1.4           0.2  Iris-setosa   

   Species_Ordinal  Species