In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use("ggplot")
sns.set_palette("Set2")


In [None]:

df = pd.read_csv("../data/college_placement_1000.csv")
df.head()


In [None]:

df.shape
df.info()
df.describe()


In [None]:

df.isnull().sum()


In [None]:

df.nunique()


In [None]:

plt.figure(figsize=(7,4))
sns.histplot(df["cgpa"], kde=True)
plt.title("CGPA Distribution")
plt.show()


In [None]:

sns.countplot(data=df, x="internships")
plt.title("Internships Distribution")
plt.show()


In [None]:

sns.histplot(df["technical_score"], kde=True)
plt.title("Technical Score Distribution")
plt.show()


In [None]:

sns.countplot(data=df, x="placed")
plt.title("Placement Distribution (0 = Not Placed, 1 = Placed)")
plt.show()


In [None]:

plt.figure(figsize=(7,4))
sns.boxplot(data=df, x="placed", y="cgpa")
plt.title("CGPA vs Placement Status")
plt.show()


In [None]:

sns.barplot(x="internships", y="placed", data=df)
plt.title("Internships vs Placement")
plt.show()


In [None]:

sns.barplot(x="projects", y="placed", data=df)
plt.title("Projects vs Placement")
plt.show()


In [None]:

sns.barplot(x="certifications", y="placed", data=df)
plt.title("Certifications vs Placement")
plt.show()


In [None]:

sns.boxplot(x="placed", y="communication_score", data=df)
plt.title("Communication Score vs Placement")
plt.show()


In [None]:

sns.boxplot(x="placed", y="aptitude_score", data=df)
plt.title("Aptitude Score vs Placement")
plt.show()


In [None]:

sns.boxplot(x="placed", y="technical_score", data=df)
plt.title("Technical Score vs Placement")
plt.show()


In [None]:

domain_placement = df.groupby("domain")["placed"].mean().sort_values()

plt.figure(figsize=(10,4))
domain_placement.plot(kind="bar")
plt.title("Placement Rate by Domain")
plt.ylabel("Placement Probability")
plt.show()


In [None]:

df["skills"].value_counts().head(10).plot(kind="bar", figsize=(10,4))
plt.title("Most Common Skill Combinations")
plt.show()


In [None]:

plt.figure(figsize=(12,6))
sns.heatmap(df.corr(numeric_only=True), annot=True, cmap="coolwarm")
plt.title("Correlation Heatmap")
plt.show()


In [None]:

print("Average CGPA of Placed Students:", df[df.placed==1]["cgpa"].mean())
print("Average CGPA of Not Placed Students:", df[df.placed==0]["cgpa"].mean())

print("\nTop Domains with Highest Placement Probability:")
print(domain_placement.sort_values(ascending=False).head())
