## Setup & Exploration

In [None]:
import pandas as pd
import numpy as np

# 1. Creating the DataFrame
# Simulating a small company dataset with some missing values (NaN)
data = {
    "Name": ["Sara", "Omar", "Lina", "Sami", "Ali"],
    "Age": [22, 25, 23, 24, 30],
    "Department": ["IT", "HR", "IT", "Marketing", "IT"],
    "Salary": [2000, np.nan, 2100, np.nan, 3000]
}

df = pd.DataFrame(data)

print("--- 1. Raw DataFrame ---")
display(df)  # 'display' looks better than 'print' in Notebooks

print("\n--- 2. Dataset Info ---")
df.info()  # Shows data types and non-null counts

print("\n--- 3. Statistical Summary ---")
display(df.describe()) # Summary statistics for numerical columns

Selection & Slicing

In [None]:
# A. Select by Column Name
ages = df["Age"]

# B. Select by Label (loc) -> rows 0 to 2, specific columns
subset_loc = df.loc[0:2, ["Name", "Department"]]

# C. Select by Position (iloc) -> rows 1 to 3 (exclusive), columns 0 to 2
subset_iloc = df.iloc[1:3, 0:2]

print("--- Selecting Specific Data ---")
print("First 3 rows (Name & Dept):\n")
display(subset_loc)

Conditional Filtering

In [None]:
# Goal: Find employees in 'IT' department with Age > 22
it_employees = df[ (df["Department"] == "IT") & (df["Age"] > 22) ]

print("--- IT Employees older than 22 ---")
display(it_employees)

# Check for missing salaries
missing_salaries = df[df["Salary"].isna()]
print("\n--- Employees with Missing Salaries ---")
display(missing_salaries)

Cleaning & Manipulation

In [None]:
# 1. Handling Missing Data
# Instead of dropping, let's fill NaN salaries with the average salary (Best Practice)
mean_salary = df["Salary"].mean()
df["Salary"].fillna(mean_salary, inplace=True)

# 2. Dropping Data
# Dropping Rows (Index 0 and 3) as per your example
df_dropped_rows = df.drop([0, 3], axis=0)

# Dropping a Column (e.g., 'Age')
df_dropped_col = df.drop(columns=["Age"])

print("--- DataFrame after Filling NaNs ---")
display(df)

print("\n--- DataFrame after Dropping Rows [0, 3] ---")
display(df_dropped_rows)