In [1]:
import pandas as pd

# -------------------------------
# 1. CREATING DATAFRAME MANUALLY
# -------------------------------

data = {
    'StudentID': [1, 2, 3, 4, 5],
    'Name': ['Sara', 'Ali', 'Fatima', 'Usman', 'Zara'],
    'Math': [85, 90, None, 67, 95],
    'English': [78, 85, 88, None, 93],
    'Science': [92, 89, 84, 75, 96],
    'Class': ['A', 'A', 'B', 'B', 'A']
}

df = pd.DataFrame(data)
print("🔹 Original DataFrame:")
print(df)


🔹 Original DataFrame:
   StudentID    Name  Math  English  Science Class
0          1    Sara  85.0     78.0       92     A
1          2     Ali  90.0     85.0       89     A
2          3  Fatima   NaN     88.0       84     B
3          4   Usman  67.0      NaN       75     B
4          5    Zara  95.0     93.0       96     A


In [2]:

# -----------------------
# 2. BASIC INFORMATION
# -----------------------

print("\n🔹 Data Types and Info:")
print(df.info())

print("\n🔹 Summary Statistics:")
print(df.describe())


🔹 Data Types and Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   StudentID  5 non-null      int64  
 1   Name       5 non-null      object 
 2   Math       4 non-null      float64
 3   English    4 non-null      float64
 4   Science    5 non-null      int64  
 5   Class      5 non-null      object 
dtypes: float64(2), int64(2), object(2)
memory usage: 372.0+ bytes
None

🔹 Summary Statistics:
       StudentID       Math    English    Science
count   5.000000   4.000000   4.000000   5.000000
mean    3.000000  84.250000  86.000000  87.200000
std     1.581139  12.203142   6.271629   8.105554
min     1.000000  67.000000  78.000000  75.000000
25%     2.000000  80.500000  83.250000  84.000000
50%     3.000000  87.500000  86.500000  89.000000
75%     4.000000  91.250000  89.250000  92.000000
max     5.000000  95.000000  93.000000  96.000000


In [3]:


# -----------------------
# 3. MISSING VALUES
# -----------------------

print("\n🔹 Checking for missing values:")
print(df.isnull().sum())

print("\n🔹 Filling missing values with column mean:")
df['Math'].fillna(df['Math'].mean(), inplace=True)
df['English'].fillna(df['English'].mean(), inplace=True)


🔹 Checking for missing values:
StudentID    0
Name         0
Math         1
English      1
Science      0
Class        0
dtype: int64

🔹 Filling missing values with column mean:


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Math'].fillna(df['Math'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['English'].fillna(df['English'].mean(), inplace=True)


In [4]:


# -----------------------
# 4. FILTERING DATA
# -----------------------

print("\n🔹 Students who scored above 90 in Science:")
print(df[df['Science'] > 90])


🔹 Students who scored above 90 in Science:
   StudentID  Name  Math  English  Science Class
0          1  Sara  85.0     78.0       92     A
4          5  Zara  95.0     93.0       96     A


In [5]:


# -----------------------
# 5. SORTING DATA
# -----------------------

print("\n🔹 Sorting by Math scores (descending):")
print(df.sort_values(by='Math', ascending=False))


🔹 Sorting by Math scores (descending):
   StudentID    Name   Math  English  Science Class
4          5    Zara  95.00     93.0       96     A
1          2     Ali  90.00     85.0       89     A
0          1    Sara  85.00     78.0       92     A
2          3  Fatima  84.25     88.0       84     B
3          4   Usman  67.00     86.0       75     B


In [6]:

# -----------------------
# 6. ADDING NEW COLUMN
# -----------------------

df['Total'] = df['Math'] + df['English'] + df['Science']
df['Percentage'] = df['Total'] / 3
print("\n🔹 DataFrame with Total & Percentage columns:")
print(df)


🔹 DataFrame with Total & Percentage columns:
   StudentID    Name   Math  English  Science Class   Total  Percentage
0          1    Sara  85.00     78.0       92     A  255.00   85.000000
1          2     Ali  90.00     85.0       89     A  264.00   88.000000
2          3  Fatima  84.25     88.0       84     B  256.25   85.416667
3          4   Usman  67.00     86.0       75     B  228.00   76.000000
4          5    Zara  95.00     93.0       96     A  284.00   94.666667


In [7]:
# -----------------------
# 7. APPLY FUNCTION
# -----------------------

def grade(pct):
    if pct >= 90:
        return 'A+'
    elif pct >= 80:
        return 'A'
    elif pct >= 70:
        return 'B'
    else:
        return 'C'

df['Grade'] = df['Percentage'].apply(grade)
print("\n🔹 DataFrame with Grade column:")
print(df[['Name', 'Percentage', 'Grade']])



🔹 DataFrame with Grade column:
     Name  Percentage Grade
0    Sara   85.000000     A
1     Ali   88.000000     A
2  Fatima   85.416667     A
3   Usman   76.000000     B
4    Zara   94.666667    A+


In [8]:

# -----------------------
# 8. GROUPBY & AGGREGATION
# -----------------------

print("\n🔹 Class-wise Average Scores:")
print(df.groupby('Class')[['Math', 'English', 'Science']].mean())


🔹 Class-wise Average Scores:
         Math    English    Science
Class                              
A      90.000  85.333333  92.333333
B      75.625  87.000000  79.500000


In [None]:

# -----------------------
# 9. MERGING TWO DATAFRAMES
# -----------------------

# Create another table of attendance
attendance = {
    'StudentID': [1, 2, 3, 4, 5],
    'Attendance (%)': [95, 88, 90, 85, 92]
}
df_attendance = pd.DataFrame(attendance)

merged_df = pd.merge(df, df_attendance, on='StudentID')
print("\n🔹 Merged DataFrame with Attendance:")
print(merged_df)



🔹 Merged DataFrame with Attendance:
   StudentID    Name   Math  English  Science Class   Total  Percentage Grade  \
0          1    Sara  85.00     78.0       92     A  255.00   85.000000     A   
1          2     Ali  90.00     85.0       89     A  264.00   88.000000     A   
2          3  Fatima  84.25     88.0       84     B  256.25   85.416667     A   
3          4   Usman  67.00     86.0       75     B  228.00   76.000000     B   
4          5    Zara  95.00     93.0       96     A  284.00   94.666667    A+   

   Attendance (%)  
0              95  
1              88  
2              90  
3              85  
4              92  


In [10]:



# -----------------------
# 10. EXPORT TO CSV
# -----------------------

merged_df.to_csv("student_report.csv", index=False)
print("\n✅ Final report saved as 'student_report.csv'")


✅ Final report saved as 'student_report.csv'
