# ✅ Topics to Cover:
Section	Focus

1️⃣ Creating DataFrames	From dict, list, CSV

2️⃣ Viewing Data	head(), tail(), info(), describe()

3️⃣ Selecting Data	df['col'], df.loc[], df.iloc[]

4️⃣ Filtering Data	Boolean masks, conditions



# 🔹 1. Create DataFrame

In [1]:
import pandas as pd

# From dictionary
data = {
    'Name': ['Ali', 'Sara', 'Ahmed'],
    'Age': [22, 25, 21],
    'City': ['Lahore', 'Karachi', 'Islamabad']
}

df = pd.DataFrame(data)
print(df)


    Name  Age       City
0    Ali   22     Lahore
1   Sara   25    Karachi
2  Ahmed   21  Islamabad


# 🔹 2. Basic Exploration

  Use  .head() to preview

  Use   .info() shows non-null types and memory

  Use   .describe() for mean, std, min, etc.

In [2]:
print(df.head())       # First 5 rows
print(df.tail())       # Last 5 rows
print(df.info())       # Summary info
print(df.describe())   # Statistical summary (numerical only)


    Name  Age       City
0    Ali   22     Lahore
1   Sara   25    Karachi
2  Ahmed   21  Islamabad
    Name  Age       City
0    Ali   22     Lahore
1   Sara   25    Karachi
2  Ahmed   21  Islamabad
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    3 non-null      object
 1   Age     3 non-null      int64 
 2   City    3 non-null      object
dtypes: int64(1), object(2)
memory usage: 204.0+ bytes
None
             Age
count   3.000000
mean   22.666667
std     2.081666
min    21.000000
25%    21.500000
50%    22.000000
75%    23.500000
max    25.000000


# 🔹 3. Column & Row Selection

    Use df['col'] or df[['col1', 'col2']]

    .loc[] is label-based, .iloc[] is index-based

In [4]:
# Select one column
print(df['Name'])
print("______________")
# Select multiple columns
print(df[['Name', 'Age']])
print("______________")

# Select rows using loc
print(df.loc[0])            # First row
print(df.loc[0:1])          # First two rows
print("______________")

# Select rows using iloc
print(df.iloc[0])           # First row
print(df.iloc[0:2])         # First two rows


0      Ali
1     Sara
2    Ahmed
Name: Name, dtype: object
______________
    Name  Age
0    Ali   22
1   Sara   25
2  Ahmed   21
______________
Name       Ali
Age         22
City    Lahore
Name: 0, dtype: object
   Name  Age     City
0   Ali   22   Lahore
1  Sara   25  Karachi
______________
Name       Ali
Age         22
City    Lahore
Name: 0, dtype: object
   Name  Age     City
0   Ali   22   Lahore
1  Sara   25  Karachi


# 🔹 4. Filtering Rows (Conditions)

    Use boolean indexing: df[condition]

In [6]:
# Filter rows where Age > 21
print(df[df['Age'] > 21])

print("__________________") 

# Filter where City is Lahore
print(df[df['City'] == 'Lahore'])


   Name  Age     City
0   Ali   22   Lahore
1  Sara   25  Karachi
__________________
  Name  Age    City
0  Ali   22  Lahore


# 🎯 Mini Task – End of Day 4

In [9]:
# Create DataFrame of students with Name, Marks, City
df = pd.DataFrame({
    'Name': ['Shoaib', 'Ali', 'Hassan'],
    'Marks': [85, 78, 92],
    'City': ['Lahore', 'Karachi', 'Islamabad']
})

# Filter where Marks > 80
print(df[df['Marks'] > 80])

print("- - - - - - - - - - -")

# Show info and describe
print(df.info())
print("- - - - - - - - - - -")
print(df.describe())


     Name  Marks       City
0  Shoaib     85     Lahore
2  Hassan     92  Islamabad
- - - - - - - - - - -
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    3 non-null      object
 1   Marks   3 non-null      int64 
 2   City    3 non-null      object
dtypes: int64(1), object(2)
memory usage: 204.0+ bytes
None
- - - - - - - - - - -
       Marks
count    3.0
mean    85.0
std      7.0
min     78.0
25%     81.5
50%     85.0
75%     88.5
max     92.0
