In [None]:
!pip install pandas matplotlib seaborn

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
df = pd.read_csv('student_performance.csv')

# Display basic statistics
print("Basic Statistics:")
print(df.describe())

# Visualize the distribution of exam scores using histograms
plt.figure(figsize=(12, 4))
plt.subplot(1, 3, 1)
sns.histplot(df['math score'], kde=True)
plt.title('Math Score Distribution')

plt.subplot(1, 3, 2)
sns.histplot(df['reading score'], kde=True)
plt.title('Reading Score Distribution')

plt.subplot(1, 3, 3)
sns.histplot(df['writing score'], kde=True)
plt.title('Writing Score Distribution')

plt.tight_layout()
plt.show()

# Calculate the overall average score for each student
df['overall_avg_score'] = (df['math score'] + df['reading score'] + df['writing score']) / 3

# Answering questions
print("\nAnswers to the Questions:")
print("1. Average math score for male students:", df[df['gender'] == 'male']['math score'].mean())

test_prep_percentage = df[df['test preparation course'] == 'completed'].shape[0] / df.shape[0] * 100
print("2. Percentage of students completed the test preparation course:", test_prep_percentage)

plt.figure(figsize=(10, 6))
sns.boxplot(x='parental level of education', y='overall_avg_score', data=df, hue='parental level of education', palette='Set2', legend=False)
plt.title('Parental Level of Education vs. Overall Average Score')
plt.xticks(rotation=45)
plt.show()

# Calculate the percentage of maasai students receiving standard lunch
group_c_lunch_count = df[(df['race/ethnicity'] == 'maasai') & (df['lunch'] == 'standard')].shape[0]
total_group_c_count = df[df['race/ethnicity'] == 'maasai'].shape[0]
percentage_standard_lunch_group_c = (group_c_lunch_count / total_group_c_count) * 100 if total_group_c_count != 0 else 0
print("4. Percentage of Maasai students receiving standard lunch:", percentage_standard_lunch_group_c)

# Calculate the percentage of students receiving standard lunch within each Ethnicity/Race
df['lunch_percentage'] = df['lunch'].apply(lambda x: 100 * (x == 'standard'))
group_lunch_percentages = df.groupby('race/ethnicity')['lunch_percentage'].mean()
print("\nPercentage of students receiving standard lunch within each Ethnicity:")
print(group_lunch_percentages)

# Visualize the percentage of students receiving standard lunch within each Ethnicity/Race
plt.figure(figsize=(10, 6))
sns.barplot(x=group_lunch_percentages.index, y=group_lunch_percentages.values, hue=group_lunch_percentages.index, palette='Set2', legend=False)
plt.title('Percentage of Students Receiving Standard Lunch Within Each Ethnicity')
plt.xticks(rotation=45)
plt.show()
