# Day 2: Python Data Structures, Pandas & Data Cleaning

## Python Data Structures

In [None]:
# Lists
fruits = ["apple", "banana", "cherry"]
fruits.append("orange")
fruits.remove("banana")
print(fruits)

# Tuples
colors = ("red", "green", "blue")
print(colors[0])

# Dictionaries
person = {"name": "Alice", "age": 25}
print(person["name"])
person["age"] = 26

## Pandas Deep Dive

In [None]:
import pandas as pd

# Creating a DataFrame
df = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Score': [85, 92, 78],
    'Passed': [True, True, False]
})

print(df['Score'])
print(df[['Name', 'Score']])
print(df[0:2])
print(df.sort_values('Score', ascending=False))
print(df['Score'].mean())
print(df.groupby('Passed').mean())

## Data Cleaning Basics

In [None]:
df_dirty = pd.DataFrame({
    'name': ['Alice', 'Bob', 'Charlie'],
    'age': [25, None, 30]
})

print(df_dirty.isnull())
print(df_dirty.dropna())
print(df_dirty.fillna(0))

df_dirty['age'] = df_dirty['age'].fillna(0).astype(int)
df_dirty.rename(columns={'name': 'Name'}, inplace=True)
print(df_dirty)

## Hands-On Data Cleaning Example

In [None]:
df_messy = pd.DataFrame({
    ' Name ': ['Alice', 'Bob', 'Bob'],
    ' Age': [25, None, 25],
    ' Salary ': [50000, 60000, 60000]
})

df_messy.drop_duplicates(inplace=True)
df_messy.columns = [col.strip().lower() for col in df_messy.columns]
df_messy['age'] = df_messy['age'].fillna(df_messy['age'].mean())
df_messy['age'] = df_messy['age'].astype(int)

print(df_messy)

## Practice Questions
1. List of numbers 1-10, print evens
2. Dictionary of students and scores, print those >80
3. Tuple with 5 items, try changing one
4. Create DataFrame and sort by salary
5. Group by department, average salary
6. Filter rows where salary > 50K and dept == 'IT'
7. Drop rows with missing values
8. Fill missing values with median
9. Convert object to int
10. Rename columns to lowercase and remove spaces

## Interview Questions (Write your answers below each)
1. Difference between list and tuple?
2. Difference between dictionary and list?
3. Ways to iterate through a dictionary?
4. What is a DataFrame?
5. How to select rows and columns?
6. What is groupby used for?
7. Reasons for missing data?
8. Handling missing data in pandas?
9. How to identify duplicates?
10. How to convert data types?