In [1]:
# Import the necessary libraries
import pandas as pd
from sklearn.datasets import load_iris

# Step 1: Load the Iris dataset
iris = load_iris()
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)

# Step 2: Add the target (species) column to the DataFrame
df['species'] = pd.Categorical.from_codes(iris.target, iris.target_names)

# Step 3: Display the first few rows of the dataset
print("First few rows of the Iris dataset:")
print(df.head())

# Step 4: Dataset overview
print("\nDataset Summary:")
print(df.describe())

# Step 5: Check for missing values
print("\nMissing values in the dataset:")
print(df.isnull().sum())

# Step 6: Display data types of each column
print("\nData types of the columns:")
print(df.dtypes)

# Step 7: Data Manipulation Example (Adding a new column)
# Add a new column that calculates the ratio of petal length to sepal length
df['petal_to_sepal_ratio'] = df['petal length (cm)'] / df['sepal length (cm)']

# Step 8: Display the first few rows with the new column
print("\nFirst few rows after adding the new column 'petal_to_sepal_ratio':")
print(df.head())

# Step 9: Display unique species in the dataset
print("\nUnique species in the dataset:")
print(df['species'].unique())

# Step 10: Group the data by species and compute the average of each feature
grouped_df = df.groupby('species').mean()
print("\nAverage of each feature grouped by species:")
print(grouped_df)


First few rows of the Iris dataset:
   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                5.1               3.5                1.4               0.2   
1                4.9               3.0                1.4               0.2   
2                4.7               3.2                1.3               0.2   
3                4.6               3.1                1.5               0.2   
4                5.0               3.6                1.4               0.2   

  species  
0  setosa  
1  setosa  
2  setosa  
3  setosa  
4  setosa  

Dataset Summary:
       sepal length (cm)  sepal width (cm)  petal length (cm)  \
count         150.000000        150.000000         150.000000   
mean            5.843333          3.057333           3.758000   
std             0.828066          0.435866           1.765298   
min             4.300000          2.000000           1.000000   
25%             5.100000          2.800000           1.600000   
50%      

  grouped_df = df.groupby('species').mean()
