# 02 — Pandas Fundamentals
**Data Analysis Portfolio**

Topics: Series, DataFrame, filtering, groupby, merge, apply, pivot

In [None]:
import pandas as pd
import numpy as np
print('Pandas version:', pd.__version__)
pd.set_option('display.max_columns', 20)

## 1. Series & DataFrame Creation

In [None]:
s = pd.Series([85,92,78,90,88], index=['Math','Science','English','History','CS'])
print("Series:\n", s)
print("Max subject:", s.idxmax(), "=", s.max())

In [None]:
np.random.seed(42)
n = 100
df = pd.DataFrame({
    'student_id':    range(1001, 1001+n),
    'name':          ['Student_'+str(i) for i in range(n)],
    'age':           np.random.randint(17, 25, n),
    'gender':        np.random.choice(['Male','Female'], n),
    'department':    np.random.choice(['CS','Maths','Physics','Chemistry'], n),
    'math_score':    np.random.randint(40, 100, n),
    'science_score': np.random.randint(35, 100, n),
    'english_score': np.random.randint(30, 100, n),
    'attendance_%':  np.random.uniform(60, 100, n).round(1),
})
df['total']   = df['math_score'] + df['science_score'] + df['english_score']
df['avg']     = (df['total'] / 3).round(2)
df['passed']  = df['avg'] >= 60
print("Shape:", df.shape)
print(df.head())

## 2. Explore

In [None]:
print(df.info())
print()
print(df.describe().round(2))

In [None]:
print(df['department'].value_counts())
print()
print(df['gender'].value_counts())

## 3. Selecting & Filtering

In [None]:
# Boolean filter
cs_passed = df[(df['department']=='CS') & (df['passed']==True)]
print(f"CS students who passed: {len(cs_passed)}")

# isin()
stem = df[df['department'].isin(['CS','Maths','Physics'])]
print(f"STEM students: {len(stem)}")

# query()
high = df.query('avg > 85 and `attendance_%` > 90')
print(f"High scorers with attendance>90: {len(high)}")

## 4. Sorting

In [None]:
top10 = df.sort_values('avg', ascending=False).head(10)
print(top10[['name','department','avg','attendance_%']])

## 5. GroupBy & Aggregation

In [None]:
dept = df.groupby('department').agg(
    count      =('student_id','count'),
    avg_math   =('math_score','mean'),
    avg_science=('science_score','mean'),
    pass_rate  =('passed','mean')
).round(2)
dept['pass_rate'] = (dept['pass_rate']*100).round(1)
print(dept)

In [None]:
pivot = df.groupby(['department','gender'])['avg'].mean().round(2).unstack()
print(pivot)

## 6. apply() / map() / lambda

In [None]:
def grade(score):
    if score >= 90:   return 'A+'
    elif score >= 80: return 'A'
    elif score >= 70: return 'B'
    elif score >= 60: return 'C'
    else:             return 'F'

df['grade'] = df['avg'].apply(grade)
print(df['grade'].value_counts().sort_index())
df['gender_code'] = df['gender'].map({'Male':0,'Female':1})
df['category']    = df['avg'].apply(lambda x: 'High' if x>=75 else ('Medium' if x>=60 else 'Low'))
print(df['category'].value_counts())

## 7. Merge

In [None]:
dept_info = pd.DataFrame({
    'department': ['CS','Maths','Physics','Chemistry'],
    'hod':        ['Dr. Kumar','Dr. Nair','Dr. Patel','Dr. Singh'],
    'labs':       [5, 2, 4, 6]
})
merged = df.merge(dept_info, on='department', how='left')
print(merged[['name','department','hod','labs']].head(5))

## 8. Pivot Table

In [None]:
pt = pd.pivot_table(df, values='avg', index='department', columns='grade', aggfunc='count', fill_value=0)
print(pt)

---
## ✅ Summary
| Operation | Method |
|-----------|--------|
| Filter | Boolean, `.query()`, `.isin()` |
| Aggregate | `.groupby().agg()` |
| Transform | `.apply()`, `.map()`, lambda |
| Combine | `.merge()` |
| Summarize | `.pivot_table()` |