# Load and Prepare Data 

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

def run_citrus_analysis():
    #  Load and Prepare Data 
    print("Step 1: Loading and Preparing Data ")
    
    try:
        # Load the dataset using pandas
        citrus_df = pd.read_csv('../data/citrus.csv')
    except FileNotFoundError:
        print("Error: '../data/citrus.csv' not found. Make sure the file is in the same directory as the script.")
        return

    # Convert the DataFrame to a NumPy array for numerical operations
    # The first column 'name' is categorical, so we'll work with diameter and weight
    data = citrus_df[['diameter', 'weight']].to_numpy()

    # Using Slice Notation to select data
    # Oranges are the first 5000 rows
    oranges_data = data[:5000, :]
    # Grapefruits are the next 5000 rows
    grapefruit_data = data[5000:, :]

    # Creating specific arrays for diameter and weight for each fruit
    orange_diameter = oranges_data[:, 0]
    orange_weight = oranges_data[:, 1]

    grapefruit_diameter = grapefruit_data[:, 0]
    grapefruit_weight = grapefruit_data[:, 1]

    print("--- First 5 entries of each array ---")
    print(f"Orange Diameter: {orange_diameter[:5]}")
    print(f"Orange Weight:   {orange_weight[:5]}")
    print(f"Grapefruit Diameter: {grapefruit_diameter[:5]}")
    print(f"Grapefruit Weight:   {grapefruit_weight[:5]}")
    print("\n" + "="*50 + "\n")

run_citrus_analysis()
    

Step 1: Loading and Preparing Data 
--- First 5 entries of each array ---
Orange Diameter: [2.96 3.91 4.42 4.47 4.48]
Orange Weight:   [86.76 88.05 95.17 95.6  95.76]
Grapefruit Diameter: [7.63 7.69 7.72 7.77 7.84]
Grapefruit Weight:   [126.79 133.98 135.56 135.62 136.63]


