In [1]:
# Importing all necessary libraries for data loading and inspection

import pandas as pd            
import numpy as np            
import os                      # OS helps us work with file paths and check if files exist
import warnings                # Used to hide unnecessary warnings for a clean notebook

# Ignore warnings to avoid cluttering the notebook output
warnings.filterwarnings("ignore")

print("Libraries imported successfully.")


Libraries imported successfully.


In [2]:

data_path = "../data/sample.csv"

# Check whether the file exists at the given path
if os.path.exists(data_path):
    print(f"Dataset found at: {data_path}")
else:
    print("⚠️ Dataset NOT found! Please check the file path.")


Dataset found at: ../data/sample.csv


In [None]:
# Function to safely load a CSV file into a pandas DataFrame

def load_csv(file_path):
    """
    Loads a CSV file and returns a pandas DataFrame.
    Handles common errors and prints user-friendly messages.
    """
    
    # Check file extension to ensure it's a CSV
    if not file_path.endswith(".csv"):
        print(" Error: The selected file is not a CSV file.")
        return None
    
    # Check if the file exists
    if not os.path.exists(file_path):
        print(" Error: File not found at the given path.")
        return None
    
    try:
        # Try reading the CSV file
        df = pd.read_csv(file_path)
        print(f"✔ CSV loaded successfully! Shape: {df.shape}")
        return df
    
    except Exception as e:
        # Catch any unexpected errors during reading
        print(" Error while reading the CSV file:")
        print(e)
        return None

# Testing the function on our dataset
data = load_csv(data_path)
data.head()  # Show the first few rows


✔ CSV loaded successfully! Shape: (150, 6)


Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [4]:

if data is not None:
    
    print(" Preview of the first 5 rows:")
    display(data.head())  
    
    print("\n Preview of the last 5 rows:")
    display(data.tail())  
    
    print("\n Dataset Info:")
    print(data.info())     # Show column types and non-null counts
    
    print("\n Missing Values in Each Column:")
    print(data.isnull().sum())  # Count missing values in each column
    
else:
    print(" Cannot inspect data because the CSV failed to load.")


 Preview of the first 5 rows:


Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa



 Preview of the last 5 rows:


Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
145,146,6.7,3.0,5.2,2.3,Iris-virginica
146,147,6.3,2.5,5.0,1.9,Iris-virginica
147,148,6.5,3.0,5.2,2.0,Iris-virginica
148,149,6.2,3.4,5.4,2.3,Iris-virginica
149,150,5.9,3.0,5.1,1.8,Iris-virginica



 Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             150 non-null    int64  
 1   SepalLengthCm  150 non-null    float64
 2   SepalWidthCm   150 non-null    float64
 3   PetalLengthCm  150 non-null    float64
 4   PetalWidthCm   150 non-null    float64
 5   Species        150 non-null    object 
dtypes: float64(4), int64(1), object(1)
memory usage: 7.2+ KB
None

 Missing Values in Each Column:
Id               0
SepalLengthCm    0
SepalWidthCm     0
PetalLengthCm    0
PetalWidthCm     0
Species          0
dtype: int64
