### Q1
Import required libraries and load the Excel dataset.

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

student_df = pd.read_excel('/content/Python Data.xlsx')

### Q2
Count the number of unique students based on first name.

In [None]:
num_unique_students = student_df['First Name'].nunique()
print("Unique student count:", num_unique_students)

### Q3
Calculate and display the average CGPA of students.

In [None]:
mean_cgpa = student_df['CGPA'].mean()
print("Mean CGPA of students:", round(mean_cgpa, 2))

### Q4
Plot the distribution of students across graduation years.

In [None]:
student_df['Year of Graduation'].value_counts().sort_index().plot(kind='bar', color='lightgreen')
plt.title("Graduation Year Distribution")
plt.xlabel("Graduation Year")
plt.ylabel("Student Count")
plt.show()

### Q5
Plot the distribution of Python experience in months.

In [None]:
sns.histplot(student_df['Experience with python (Months)'], bins=10, kde=True, color='teal')
plt.title("Python Experience Distribution (Months)")
plt.xlabel("Months of Experience")
plt.ylabel("Count")
plt.show()

### Q6
Map family income ranges to numeric values and compute mean income.

In [None]:
income_mapping = {
    '0-2 Lakh': 1,
    '2-5 Lakh': 3.5,
    '5-7 Lakh': 6,
    '7 Lakh+': 8
}

student_df['Family Income Numeric'] = student_df['Family Income'].map(income_mapping)

avg_family_income = student_df['Family Income Numeric'].mean()
print("Mean Family Income (Lakhs):", round(avg_family_income, 2))

### Q7
Plot top 5 colleges by average CGPA.

In [None]:
student_df.groupby('College Name')['CGPA'].mean().sort_values(ascending=False).head(5).plot(kind='bar', color='navy')
plt.title("Top 5 Colleges by Avg CGPA")
plt.ylabel("Avg CGPA")
plt.show()

### Q8
Show outliers in quantity by attendee status using boxplot.

In [None]:
sns.boxplot(data=student_df[['Attendee Status', 'Quantity']])
plt.title("Attendee Status vs Quantity Outliers")
plt.show()

### Q9
Plot average CGPA by city.

In [None]:
student_df.groupby('City')['CGPA'].mean().sort_values(ascending=False).plot(kind='bar', color='darkcyan')
plt.title("City-wise Average CGPA")
plt.ylabel("Avg CGPA")
plt.xticks(rotation=45)
plt.show()

### Q10
Scatter plot: family income vs CGPA and correlation value.

In [None]:
sns.scatterplot(data=student_df, x='Family Income Numeric', y='CGPA')
plt.title("CGPA vs Family Income")
plt.xlabel("Family Income (Lakhs)")
plt.ylabel("CGPA")
plt.show()

income_cgpa_corr = student_df['Family Income Numeric'].corr(student_df['CGPA'])
print("Correlation (Family Income & CGPA):", round(income_cgpa_corr, 2))

### Q11
Pairplot to study relationships between CGPA, income, Python experience and expected salary.

In [None]:
sns.pairplot(student_df, vars=['CGPA', 'Family Income Numeric', 'Experience with python (Months)', 'Expected salary (Lac)'])
plt.suptitle("Pairwise Relationships with Salary", y=1.02)
plt.show()

### Q12
Find most popular event by designation.

In [None]:
student_df.groupby(['Designation', 'Events']).size().unstack().fillna(0).idxmax(axis=1)

### Q13
Average CGPA and expected salary grouped by leadership skill.

In [None]:
student_df.groupby('Leadership- skills')[['CGPA', 'Expected salary (Lac)']].mean()

### Q14
Correlation between leadership skill and expected salary.

In [None]:
student_df['Leadership_Flag'] = student_df['Leadership- skills'].apply(lambda x: 1 if str(x).strip().lower() == 'yes' else 0)
lead_corr = student_df['Leadership_Flag'].corr(student_df['Expected salary (Lac)'])
print("Leadership vs Salary Correlation:", round(lead_corr, 2))

### Q15
Count students graduating by end of 2024.

In [None]:
grad_2024 = student_df[student_df['Year of Graduation'] <= 2024]
print("Students graduating by 2024 end:", grad_2024.shape[0])

### Q16
Value counts of event awareness source.

In [None]:
student_df['How did you come to know about this event?'].value_counts()

### Q17
Count students who participated in data science events.

In [None]:
ds_participants = student_df[student_df['Events'].str.contains('data science', case=False, na=False)]
print("Data Science event participants:", ds_participants.shape[0])

### Q18
Average expected salary for students with high CGPA and Python experience.

In [None]:
cgpa_above_avg = student_df['CGPA'] > student_df['CGPA'].mean()
exp_above_avg = student_df['Experience with python (Months)'] > student_df['Experience with python (Months)'].mean()

qualified_students = student_df[cgpa_above_avg & exp_above_avg]
avg_exp_salary = qualified_students['Expected salary (Lac)'].mean()
print("Avg expected salary (high CGPA + Python exp):", round(avg_exp_salary, 2))

### Q19
Top 5 colleges where students came to know about the event via college.

In [None]:
college_ref_df = student_df[student_df['How did you come to know about this event?'].str.contains('college', case=False, na=False)]
top_5_colleges = college_ref_df['College Name'].value_counts().head(5)
print(top_5_colleges)