In [1]:
# Step 1: Set Up the Environment
# Objective: Ensure you have the right tools and packages installed.

# 1. Install the required libraries.
# 2. Verify the installation by importing the libraries in a Python script or Jupyter notebook
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


# Step 2: Load & Explore the Dataset
# Objective: Load data into a pandas DataFrame and obtain a basic understanding of its structure.


# 3. Load a CSV file into a DataFrame.
# 4. Display the first few records to understand the structure.
# 5. Get a summary of the dataset.
def data():
    try:
        df=pd.read_csv('iris.csv')
    except FileNotFoundError:
        print("File not found")
        return
    print("First 5 rows of dataset:",df.head())
    print("\n Dataset info:",df.info())
    print("\n Stastical summary:",df.describe())





# Step 3: Perform NumPy Operations
# Objective: Utilize NumPy for basic numerical operations and array manipulations.

# 6. Convert a DataFrame column to a NumPy array and perform array operations like mean and sum.
# 7. Create a NumPy array and calculate the variance and standard deviation.
# 8. Use NumPy to filter based on conditions.

    petal_length_arr=df['petal_length'].to_numpy()
    print("Mean:",np.mean(petal_length_arr))
    print("Sum",np.sum(petal_length_arr))

    arr=np.array([1,2,3,4,5])
    print("Variance:",np.var(arr))
    print("standard Deviation:",np.std(arr))

    filtered=petal_length_arr[petal_length_arr>1.7]
    print("Filtered value:",filtered)


# Step 4: Data Manipulation with Pandas
# Objective: Use Pandas to clean and manipulate dataset for analysis.

# 9. Handle missing data by filling or dropping.
# 10. Create new columns or modify existing ones.
# 11. Use groupby to aggregate data.

    df=df.dropna()
    df['petal_ratio']=np.divide(
    df['petal_length'],df['petal_width'],
    out=np.zeros_like(df['petal_length']),
    where=df['petal_width']!=0
   )

    grouped=df.groupby('species').mean(numeric_only=True)
    print("\nMean values by species:")
    print(grouped)



# Step 5: Data Visualization with Matplotlib & Seaborn
# Objective: Visualize the data to identify patterns, trends, and insights.

# 12. Use Matplotlib to create a basic plot.
# 13. Create a histogram using Seaborn.
# 14. Plot a box plot for a clear view of data distribution.


    plt.plot(df['sepal_length'], df['sepal_width'], 'o')
    plt.title("Sepal Length vs Width")
    plt.xlabel("Sepal Length")
    plt.ylabel("Sepal Width")
    plt.grid(True)
    plt.show()

    sns.histplot(df['petal_length'], kde=True)
    plt.title("Petal Length Distribution")
    plt.show()


    sns.boxplot(x='species', y='petal_ratio', data=df)
    plt.title("Petal Ratio by Species")
    plt.show()




