In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as pd

# Load the dataset from the uploaded file
file_path = "/kaggle/input/iris-dataset/Iris.csv"  # Update folder name if needed
iris_data = pd.read_csv(file_path)
print(iris_data.head())

In [None]:
# Task 1: Data Inspection and Missing Value Handling
print("\nInspecting Missing Values:")
missing_values = iris_data.isnull().sum()
print(missing_values)

# Handle missing values in numeric columns
numeric_columns = ['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']
for column in numeric_columns:
    iris_data[column].fillna(iris_data[column].mean(), inplace=True)

# Handle missing values in the categorical column
iris_data['Species'].fillna(iris_data['Species'].mode()[0], inplace=True)


In [None]:
# --- Task 2: Data Cleaning and Transformation ---

# Remove duplicate entries
iris_data = iris_data.drop_duplicates()
print(f"\nNumber of rows after removing duplicates: {len(iris_data)}")

# Create a new column for petal area
iris_data['PetalArea'] = iris_data['PetalLengthCm'] * iris_data['PetalWidthCm']

# Drop rows with any remaining missing values (if any)
iris_data.dropna(inplace=True)


In [None]:
# --- Task 3: Aggregation and Transformation ---

# Convert the Species column into numeric values
iris_data['SpeciesNumeric'] = iris_data['Species'].astype('category').cat.codes

# Calculate the mean of each numeric column grouped by Species
aggregation_results = iris_data.groupby('Species').mean(numeric_only=True)
print("\nMean values grouped by species:")
print(aggregation_results)


In [None]:
# --- Task 4: Advanced Reshaping ---

# Reshape the dataset to a long format
iris_long = pd.melt(
    iris_data,
    id_vars=['Id', 'Species', 'SpeciesNumeric'],
    value_vars=['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'PetalArea'],
    var_name='MeasurementType',
    value_name='MeasurementValue'
)

# Display the reshaped dataset
print("\nReshaped Dataset (long format):")
print(iris_long.head())

# --- Save the cleaned and reshaped datasets ---
iris_data.to_csv('/kaggle/working/iris_cleaned.csv', index=False)
iris_long.to_csv('/kaggle/working/iris_long.csv', index=False)

print("\nCleaned and reshaped datasets have been saved as 'iris_cleaned.csv' and 'iris_long.csv'.")