In [4]:
!pip install faker

Collecting faker
  Downloading Faker-35.0.0-py3-none-any.whl (1.9 MB)
     ---------------------------------------- 1.9/1.9 MB 16.4 kB/s eta 0:00:00
Installing collected packages: faker
Successfully installed faker-35.0.0




In [21]:
import random

from faker import Faker
import pandas as pd

def generate_student_data(num_students):
    fake = Faker()
    s_data = []

    for i in range(num_students):
        name = fake.name()
        age = random.randint(17, 21)
        roll_no = f"R{i+1:03}"

        eng = random.randint(0, 100)
        maths = random.randint(0, 100)
        science = random.randint(0, 100)

        student = {
            'Name : ': name,
            'Age : ': age,
            'Roll_no : ': roll_no,
            'English : ': eng,
            'Maths : ': maths,
            'Science : ': science,
        }

        s_data.append(student)
    
    return pd.DataFrame(s_data)  # Returning DataFrame

In [25]:
num_students = 50
df = generate_student_data(num_students)

# Save to CSV
df.to_csv("student_data.csv", index=False)
print("Student data saved to student_data.csv")

Student data saved to student_data.csv


## Verification:

In [27]:
data = pd.read_csv('student_data.csv')

In [28]:
data.head()

Unnamed: 0,Name :,Age :,Roll_no :,English :,Maths :,Science :
0,Melissa Castro,17,R001,56,35,1
1,Rebecca Rogers,21,R002,85,59,79
2,Deborah Williams,21,R003,53,5,52
3,Lindsey Price,20,R004,10,53,61
4,Kristen Tran,17,R005,100,94,91


In [29]:
data.describe()

Unnamed: 0,Age :,English :,Maths :,Science :
count,50.0,50.0,50.0,50.0
mean,18.9,47.58,50.94,48.94
std,1.488048,29.398799,28.332868,30.688301
min,17.0,0.0,1.0,0.0
25%,18.0,21.75,34.25,21.5
50%,19.0,49.5,51.5,51.0
75%,20.0,69.0,72.25,76.0
max,21.0,100.0,98.0,100.0


# Performing few operations

In [30]:
import pandas as pd

# Load the dataset
data = pd.read_csv('student_data.csv')

In [31]:

# 1. List of students aged > 20
students_above_20 = data[data['Age : '] > 20]
students_above_20.to_csv('result1.csv', index=False)

In [32]:

# 2. Students with 60 marks and above in Maths
students_above_60_maths = data[data['Maths : '] >= 60]
students_above_60_maths.to_csv('result2.csv', index=False)


In [33]:

# 3. Average score of the class for all 3 subjects
average_scores = pd.DataFrame({
    "Subject": ["English", "Maths", "Science"],
    "Average Score": [data['English : '].mean(), data['Maths : '].mean(), data['Science : '].mean()]
})
average_scores.to_csv('result3.csv', index=False)


In [35]:
# 4. Top performers in all 3 subjects without .loc, .idxmax(), or .iloc
top_performers = []

for subject in ["English : ", "Maths : ", "Science : "]:
    max_score = data[subject].max()  # Find the maximum score
    top_student = data[data[subject] == max_score]["Name : "].values[0]  # Get top student's name
    top_performers.append([subject.strip(" :"), top_student, max_score])

# Convert to DataFrame and save
pd.DataFrame(top_performers, columns=["Subject", "Top Student", "Score"]).to_csv('result4.csv', index=False)

print("Top performers saved to result4.csv")


Top performers saved to result4.csv


## OR
```py
# 4. Top performers in all 3 subjects
top_performers = pd.DataFrame({
    "Subject": ["English", "Maths", "Science"],
    "Top Student": [
        data.loc[data['English : '].idxmax(), 'Name : '],
        data.loc[data['Maths : '].idxmax(), 'Name : '],
        data.loc[data['Science : '].idxmax(), 'Name : ']
    ],
    "Score": [
        data['English : '].max(),
        data['Maths : '].max(),
        data['Science : '].max()
    ]
})
top_performers.to_csv('result4.csv', index=False)
```

## OR
```py

# 4. Top performers in all 3 subjects without using idxmax()
top_performers = pd.DataFrame()

subjects = ["English : ", "Maths : ", "Science : "]

top_students = []
scores = []
subject_names = []

for subject in subjects:
    top_student = data[data[subject] == data[subject].max()]
    top_students.append(top_student.iloc[0]['Name : '])  # Get top student's name
    scores.append(top_student.iloc[0][subject])  # Get top student's score
    subject_names.append(subject.strip(" :"))  # Clean column name

top_performers["Subject"] = subject_names
top_performers["Top Student"] = top_students
top_performers["Score"] = scores

# Save to CSV
top_performers.to_csv('result4.csv', index=False)
print("Top performers saved to result4.csv")
```

In [38]:
# Get students above a given age
def students_above_age(filename, age_threshold):
    data = pd.read_csv(filename)
    result = data[data['Age : '] > age_threshold]
    result.to_csv('result01.csv', index=False)
    print(f"Students older than {age_threshold} saved to result01.csv")

# Get students with marks >= threshold in a given subject
def students_above_marks(filename, subject, marks_threshold):
    data = pd.read_csv(filename)
    result = data[data[subject] >= marks_threshold]
    result.to_csv('result02.csv', index=False)
    print(f"Students with {marks_threshold}+ marks in {subject} saved to result02.csv")

# Calculate and save the average score for all subjects
def average_scores(filename):
    data = pd.read_csv(filename)
    avg_scores = pd.DataFrame({
        "Subject": ["English", "Maths", "Science"],
        "Average Score": [data['English : '].mean(), data['Maths : '].mean(), data['Science : '].mean()]
    })
    avg_scores.to_csv('result03.csv', index=False)
    print("Class average scores saved to result03.csv")

# Find and save top performers in all subjects
def top_performers(filename):
    data = pd.read_csv(filename)
    top_students = [
        [subject.strip(" :"), data[data[subject] == data[subject].max()]["Name : "].values[0], data[subject].max()]
        for subject in ["English : ", "Maths : ", "Science : "]
    ]
    pd.DataFrame(top_students, columns=["Subject", "Top Student", "Score"]).to_csv('result04.csv', index=False)
    print("Top performers saved to result04.csv")


In [40]:

# Calling functions separately
students_above_age('student_data.csv', age_threshold=20)
students_above_marks('student_data.csv', 'Maths : ', 60)
average_scores('student_data.csv')
top_performers('student_data.csv')


Students older than 20 saved to result01.csv
Students with 60+ marks in Maths :  saved to result02.csv
Class average scores saved to result03.csv
Top performers saved to result04.csv


In [41]:
# this makes code reusable and we can use it for other purpose also!

students_above_marks('student_data.csv', 'Science : ', 90)


Students with 90+ marks in Science :  saved to result02.csv
