In [6]:
%pip install numpy pandas

import pandas as pd
import numpy as np
import os

Note: you may need to restart the kernel to use updated packages.


In [7]:
series = pd.Series(
    np.random.randint(10, 100, 5),
    index=["A", "B", "C", "D", "E"]
)

print("Series:")
print(series)
print("Data type:", type(series))


Series:
A    24
B    21
C    79
D    13
E    85
dtype: int32
Data type: <class 'pandas.Series'>


In [8]:
data = {
    "Student_Name": ["Rohit", "Anjali", "Aman", "Sneha", "Kunal"],
    "Maths": [78, 85, 92, 66, 74],
    "Science": [80, 88, 90, 70, 76],
    "English": [72, 90, 85, 68, 71]
}

df = pd.DataFrame(data)

print("\nTop 3 rows:")
print(df.head(3))



Top 3 rows:
  Student_Name  Maths  Science  English
0        Rohit     78       80       72
1       Anjali     85       88       90
2         Aman     92       90       85


In [10]:
import pandas as pd

sample_data = {
    "Student_Name": ["Rohit", "Anjali", "Aman", "Sneha", "Kunal"],
    "Maths": [78, 85, 92, 66, 74],
    "Science": [80, 88, 90, 70, 76],
    "English": [72, 90, 85, 68, 71]
}

df_sample = pd.DataFrame(sample_data)

df_sample.to_csv("sample.csv", index=False)

print("sample.csv created successfully")


sample.csv created successfully


In [11]:
df["Total_Marks"] = df["Maths"] + df["Science"] + df["English"]
df["Average"] = df["Total_Marks"] / 3

print("\nDataFrame with Total and Average:")
print(df)



DataFrame with Total and Average:
  Student_Name  Maths  Science  English  Total_Marks    Average
0        Rohit     78       80       72          230  76.666667
1       Anjali     85       88       90          263  87.666667
2         Aman     92       90       85          267  89.000000
3        Sneha     66       70       68          204  68.000000
4        Kunal     74       76       71          221  73.666667


In [12]:
# Introduce NaN value
df.loc[2, "Science"] = np.nan

# (a) Identify missing values
print("\nMissing values:")
print(df.isnull())

# (b) Replace missing value with column mean
df["Science"].fillna(df["Science"].mean(), inplace=True)

print("\nAfter filling missing value:")
print(df)



Missing values:
   Student_Name  Maths  Science  English  Total_Marks  Average
0         False  False    False    False        False    False
1         False  False    False    False        False    False
2         False  False     True    False        False    False
3         False  False    False    False        False    False
4         False  False    False    False        False    False

After filling missing value:
  Student_Name  Maths  Science  English  Total_Marks    Average
0        Rohit     78     80.0       72          230  76.666667
1       Anjali     85     88.0       90          263  87.666667
2         Aman     92      NaN       85          267  89.000000
3        Sneha     66     70.0       68          204  68.000000
4        Kunal     74     76.0       71          221  73.666667


C:\Users\Rudraksha\AppData\Local\Temp\ipykernel_78616\810598215.py:9: ChainedAssignmentError: A value is being set on a copy of a DataFrame or Series through chained assignment using an inplace method.
Such inplace method never works to update the original DataFrame or Series, because the intermediate object on which we are setting values always behaves as a copy (due to Copy-on-Write).

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' instead, to perform the operation inplace on the original object, or try to avoid an inplace operation using 'df[col] = df[col].method(value)'.

See the documentation for a more detailed explanation: https://pandas.pydata.org/pandas-docs/stable/user_guide/copy_on_write.html
  df["Science"].fillna(df["Science"].mean(), inplace=True)


In [13]:
high_avg_students = df[df["Average"] > 75]

print("\nStudents with Average > 75:")
print(high_avg_students)



Students with Average > 75:
  Student_Name  Maths  Science  English  Total_Marks    Average
0        Rohit     78     80.0       72          230  76.666667
1       Anjali     85     88.0       90          263  87.666667
2         Aman     92      NaN       85          267  89.000000


In [14]:
top_performers = df.sort_values(by="Total_Marks", ascending=False).head(3)

print("\nTop 3 performers:")
print(top_performers)



Top 3 performers:
  Student_Name  Maths  Science  English  Total_Marks    Average
2         Aman     92      NaN       85          267  89.000000
1       Anjali     85     88.0       90          263  87.666667
0        Rohit     78     80.0       72          230  76.666667


In [15]:
df["English_Group"] = np.where(df["English"] >= 70, "Above 70", "Below 70")

grouped_result = df.groupby("English_Group")[["Maths", "Science"]].mean()

print("\nGrouped result (mean scores):")
print(grouped_result)



Grouped result (mean scores):
               Maths    Science
English_Group                  
Above 70       82.25  81.333333
Below 70       66.00  70.000000


In [16]:
df_marks = df[["Student_Name", "Total_Marks"]]

df_sports = pd.DataFrame({
    "Student_Name": ["Rohit", "Anjali", "Aman", "Sneha", "Kunal"],
    "Sports_Score": [20, 25, 18, 22, 24]
})

merged_df = pd.merge(df_marks, df_sports, on="Student_Name")

merged_df["Overall_Score"] = merged_df["Total_Marks"] + merged_df["Sports_Score"]

print("\nMerged DataFrame:")
print(merged_df)



Merged DataFrame:
  Student_Name  Total_Marks  Sports_Score  Overall_Score
0        Rohit          230            20            250
1       Anjali          263            25            288
2         Aman          267            18            285
3        Sneha          204            22            226
4        Kunal          221            24            245


In [18]:
merged_df.to_csv("final_student_data.csv", index=False)

import os
print("File created:", os.path.exists("final_student_data.csv"))


File created: True
