In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler

In [2]:
df = pd.read_csv('1) iris.csv')

In [3]:
df_cleaned = df.dropna()

In [4]:
for col in df_cleaned.select_dtypes(include='number').columns:
    df_cleaned[col].fillna(df_cleaned[col].mean(), inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_cleaned[col].fillna(df_cleaned[col].mean(), inplace=True)


In [6]:
for col in df_cleaned.select_dtypes(include='number').columns:
    Q1 = df_cleaned[col].quantile(0.25)
    Q3 = df_cleaned[col].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    df_cleaned = df_cleaned[(df_cleaned[col] >= lower_bound) & (df_cleaned[col] <= upper_bound)]

In [7]:
df_encoded = pd.get_dummies(df_cleaned, drop_first=True)

In [8]:
scaler = MinMaxScaler()
numeric_cols = df_encoded.select_dtypes(include='number').columns
df_encoded[numeric_cols] = scaler.fit_transform(df_encoded[numeric_cols])

In [9]:
scaler = StandardScaler()
df_encoded[numeric_cols] = scaler.fit_transform(df_encoded[numeric_cols])


In [10]:
df_encoded.to_csv('cleaned_preprocessed_data.csv', index=False)
print("✅ Cleaned and preprocessed data saved to cleaned_preprocessed_data.csv")

✅ Cleaned and preprocessed data saved to cleaned_preprocessed_data.csv
