<a href="https://colab.research.google.com/github/K1dus/PLP-Python/blob/main/Iris_Analysis_Assignment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris

# Load the Iris dataset
def load_iris_dataset():
    try:
        iris = load_iris(as_frame=True)
        df = iris.frame
        return df
    except Exception as e:
        print(f"Error loading dataset: {e}")
        return None

def main():
    # Load dataset
    df = load_iris_dataset()
    if df is None:
        return

    # Display first few rows
    print("First 5 rows:")
    print(df.head())

    # Data types and missing values
    print("\nData types:")
    print(df.dtypes)
    print("\nMissing values:")
    print(df.isnull().sum())

    # Clean dataset (Iris has no missing values, but here's how to drop if any)
    df_clean = df.dropna()

    # Basic statistics
    print("\nBasic statistics:")
    print(df_clean.describe())

    # Group by species and compute mean of numerical columns
    print("\nMean values by species:")
    print(df_clean.groupby('target').mean())

    # Map target to species name for plotting
    target_names = load_iris().target_names
    df_clean['species'] = df_clean['target'].map(lambda x: target_names[x])

    # --- Visualizations ---
    sns.set(style="whitegrid")

    # 1. Line chart: mean sepal length per sample index (trend)
    plt.figure(figsize=(8,4))
    plt.plot(df_clean.index, df_clean['sepal length (cm)'], label='Sepal Length')
    plt.title('Sepal Length Trend Across Samples')
    plt.xlabel('Sample Index')
    plt.ylabel('Sepal Length (cm)')
    plt.legend()
    plt.tight_layout()
    plt.savefig('line_chart.png')
    plt.close()

    # 2. Bar chart: average petal length per species
    plt.figure(figsize=(6,4))
    sns.barplot(x='species', y='petal length (cm)', data=df_clean, ci=None)
    plt.title('Average Petal Length per Species')
    plt.xlabel('Species')
    plt.ylabel('Petal Length (cm)')
    plt.tight_layout()
    plt.savefig('bar_chart.png')
    plt.close()

    # 3. Histogram: distribution of sepal width
    plt.figure(figsize=(6,4))
    plt.hist(df_clean['sepal width (cm)'], bins=15, color='skyblue', edgecolor='black')
    plt.title('Distribution of Sepal Width')
    plt.xlabel('Sepal Width (cm)')
    plt.ylabel('Frequency')
    plt.tight_layout()
    plt.savefig('histogram.png')
    plt.close()

    # 4. Scatter plot: sepal length vs petal length
    plt.figure(figsize=(6,4))
    sns.scatterplot(x='sepal length (cm)', y='petal length (cm)', hue='species', data=df_clean)
    plt.title('Sepal Length vs Petal Length by Species')
    plt.xlabel('Sepal Length (cm)')
    plt.ylabel('Petal Length (cm)')
    plt.legend()
    plt.tight_layout()
    plt.savefig('scatter_plot.png')
    plt.close()

    print("\nPlots saved as line_chart.png, bar_chart.png, histogram.png, and scatter_plot.png.")
    print("\nFindings:")
    print("- The Iris dataset contains 3 species with distinct petal and sepal measurements.")
    print("- Average petal length varies significantly by species.")
    print("- Sepal width is roughly normally distributed.")
    print("- There is a clear positive correlation between sepal length and petal length.")

if __name__ == "__main__":
    main()

First 5 rows:
   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                5.1               3.5                1.4               0.2   
1                4.9               3.0                1.4               0.2   
2                4.7               3.2                1.3               0.2   
3                4.6               3.1                1.5               0.2   
4                5.0               3.6                1.4               0.2   

   target  
0       0  
1       0  
2       0  
3       0  
4       0  

Data types:
sepal length (cm)    float64
sepal width (cm)     float64
petal length (cm)    float64
petal width (cm)     float64
target                 int64
dtype: object

Missing values:
sepal length (cm)    0
sepal width (cm)     0
petal length (cm)    0
petal width (cm)     0
target               0
dtype: int64

Basic statistics:
       sepal length (cm)  sepal width (cm)  petal length (cm)  \
count         150.000000        150.00


The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.

  sns.barplot(x='species', y='petal length (cm)', data=df_clean, ci=None)



Plots saved as line_chart.png, bar_chart.png, histogram.png, and scatter_plot.png.

Findings:
- The Iris dataset contains 3 species with distinct petal and sepal measurements.
- Average petal length varies significantly by species.
- Sepal width is roughly normally distributed.
- There is a clear positive correlation between sepal length and petal length.
