# Pandas Fundamentals

This notebook covers core pandas concepts with small, focused examples.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

pd.set_option("display.max_rows", 20)
pd.set_option("display.max_columns", 20)


## Series & Data Frames


In [None]:
s = pd.Series([10, 20, 30], index=["a", "b", "c"])  # 1D labeled array
s


In [None]:
df = pd.DataFrame({
    "name": ["Ana", "Ben", "Cara", "Dan"],
    "age": [23, 31, 27, 35],
    "city": ["Austin", "Boston", "Chicago", "Denver"],
    "score": [88.5, 92.0, 79.5, 85.0]
})

df


## Import & Export Data


In [None]:
from pathlib import Path

# Create a small CSV to load
csv_path = Path("sample_people.csv")
df.to_csv(csv_path, index=False)

# Read it back
loaded = pd.read_csv(csv_path)
loaded


In [None]:
# Export to Excel and read back (if you have openpyxl installed)
# excel_path = Path("sample_people.xlsx")
# df.to_excel(excel_path, index=False)
# pd.read_excel(excel_path)


## Data Exploration Functions


In [None]:
df.head(2)


In [None]:
df.tail(2)


In [None]:
df.info()


In [None]:
df.describe()


In [None]:
df["city"].value_counts()


## Statistical Functions & Plotting


In [None]:
df["age"].mean(), df["score"].median(), df["score"].std()


In [None]:
# Basic plots
ax = df["score"].plot(kind="bar", title="Scores")
ax.set_xlabel("row")
ax.set_ylabel("score")
plt.show()


In [None]:
# Scatter plot
ax = df.plot(kind="scatter", x="age", y="score", title="Age vs Score")
plt.show()


## Accessing Data


In [None]:
df["name"]  # column by label


In [None]:
df.loc[0, "name"]  # label-based


In [None]:
df.iloc[0, 1]  # position-based


In [None]:
df.loc[:, ["name", "score"]]


## Manipulating Data (Applying Functions)


In [None]:
df["score_pct"] = df["score"] / 100

# Apply a function row-wise

def grade(score):
    if score >= 90:
        return "A"
    if score >= 80:
        return "B"
    return "C"

df["grade"] = df["score"].apply(grade)

df


## Data Cleaning


In [None]:
dirty = pd.DataFrame({
    "name": ["Ana", "Ben", None, "Dan"],
    "age": [23, None, 27, 35],
    "city": ["Austin", "Boston", "Chicago", None]
})

# Missing values
cleaned = dirty.copy()

cleaned.isna()


In [None]:
# Fill missing values
cleaned["age"] = cleaned["age"].fillna(cleaned["age"].mean())
cleaned["city"] = cleaned["city"].fillna("Unknown")

# Drop rows with missing name
cleaned = cleaned.dropna(subset=["name"])

cleaned


## Iterating Over Data Frames
Iterate only when needed; vectorized ops are usually faster.


In [None]:
for idx, row in df.iterrows():
    if idx < 2:
        print(idx, row["name"], row["score"])


## Filtering & Querying Data


In [None]:
df[df["score"] >= 85]


In [None]:
df.query("age >= 30 and score >= 85")


## Grouping Data


In [None]:
grouped = df.groupby("city")["score"].mean()
grouped


In [None]:
grouped.plot(kind="bar", title="Avg Score by City")
plt.show()


## Sorting Data


In [None]:
df.sort_values("score", ascending=False)


## Merging, Concatenating & Joining Data


In [None]:
left = pd.DataFrame({
    "id": [1, 2, 3],
    "name": ["Ana", "Ben", "Cara"]
})

right = pd.DataFrame({
    "id": [2, 3, 4],
    "team": ["A", "B", "C"]
})

pd.merge(left, right, on="id", how="inner")


In [None]:
# Concatenate (stack rows)
upper = df.head(2)
lower = df.tail(2)

pd.concat([upper, lower], axis=0)


In [None]:
# Join on index
left_idx = left.set_index("id")
right_idx = right.set_index("id")

left_idx.join(right_idx, how="left")
