# College-wise Placement Comparison
## Deep Dive Analysis of Mumbai Engineering Colleges

This notebook provides detailed comparative analysis of placement data across major Mumbai colleges including:
- VJTI, SPIT, VESIT, VIT Mumbai, ACPCE, FCRCE

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

warnings.filterwarnings('ignore')
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 6)

# Load data
df = pd.read_csv('placements.csv')
df['Placement_Rate'] = (df['Placed_Students'] / df['Total_Students']) * 100

# Define tier classification
def classify_tier(avg_package):
    if avg_package >= 12:
        return 'Tier 1 (Premium)'
    elif avg_package >= 7:
        return 'Tier 2 (High)'
    else:
        return 'Tier 3 (Average)'

df['Tier'] = df['Avg_Package_LPA'].apply(classify_tier)

print(f"Data loaded: {df.shape[0]} records from {df['College'].nunique()} colleges")
display(df.head())

## Head-to-Head College Comparison

In [None]:
# Select major colleges for comparison
major_colleges = ['VJTI', 'SPIT', 'VESIT', 'VIT Mumbai', 'FCRCE', 'ACPCE']
df_major = df[df['College'].isin(major_colleges)]

# Create comprehensive comparison table
comparison = df_major.groupby('College').agg({
    'Total_Students': 'sum',
    'Placed_Students': 'sum',
    'Avg_Package_LPA': 'mean',
    'Placement_Rate': 'mean'
}).round(2)

comparison = comparison.sort_values('Avg_Package_LPA', ascending=False)
comparison['Overall_Score'] = (comparison['Placement_Rate'] * 0.6 + 
                                comparison['Avg_Package_LPA'] * 4) / 2  # Weighted score

print("="*80)
print("COMPREHENSIVE COLLEGE COMPARISON")
print("="*80)
display(comparison)

# Visualize comparison
fig, axes = plt.subplots(2, 3, figsize=(18, 10))

# 1. Average Package Comparison
ax1 = axes[0, 0]
comparison.sort_values('Avg_Package_LPA').plot(y='Avg_Package_LPA', kind='barh', ax=ax1, color='steelblue', legend=False)
ax1.set_title('Average Package Comparison', fontweight='bold', fontsize=12)
ax1.set_xlabel('Average Package (LPA)')
for i, v in enumerate(comparison.sort_values('Avg_Package_LPA')['Avg_Package_LPA']):
    ax1.text(v + 0.3, i, f'‚Çπ{v:.2f}', va='center', fontweight='bold')

# 2. Placement Rate Comparison
ax2 = axes[0, 1]
comparison.sort_values('Placement_Rate').plot(y='Placement_Rate', kind='barh', ax=ax2, color='coral', legend=False)
ax2.set_title('Placement Rate Comparison', fontweight='bold', fontsize=12)
ax2.set_xlabel('Placement Rate (%)')
for i, v in enumerate(comparison.sort_values('Placement_Rate')['Placement_Rate']):
    ax2.text(v + 1, i, f'{v:.1f}%', va='center', fontweight='bold')

# 3. Total Students vs Placed
ax3 = axes[0, 2]
x = np.arange(len(comparison))
width = 0.35
ax3.bar(x - width/2, comparison['Total_Students'], width, label='Total', color='lightblue')
ax3.bar(x + width/2, comparison['Placed_Students'], width, label='Placed', color='lightgreen')
ax3.set_xticks(x)
ax3.set_xticklabels(comparison.index, rotation=45, ha='right')
ax3.set_title('Total vs Placed Students', fontweight='bold', fontsize=12)
ax3.legend()
ax3.grid(axis='y', alpha=0.3)

# 4. Overall Performance Score
ax4 = axes[1, 0]
comparison.sort_values('Overall_Score').plot(y='Overall_Score', kind='barh', ax=ax4, color='gold', legend=False)
ax4.set_title('Overall Performance Score\n(Weighted: Rate 60% + Package 40%)', fontweight='bold', fontsize=12)
ax4.set_xlabel('Performance Score')

# 5. Performance Matrix
ax5 = axes[1, 1]
for college in comparison.index:
    ax5.scatter(comparison.loc[college, 'Avg_Package_LPA'], 
               comparison.loc[college, 'Placement_Rate'],
               s=comparison.loc[college, 'Total_Students']/2,
               alpha=0.6, label=college)
ax5.set_xlabel('Average Package (LPA)', fontweight='bold')
ax5.set_ylabel('Placement Rate (%)', fontweight='bold')
ax5.set_title('Performance Matrix\n(Bubble size = Total Students)', fontweight='bold', fontsize=12)
ax5.legend(fontsize=8, loc='best')
ax5.grid(alpha=0.3)

# 6. Tier Distribution
ax6 = axes[1, 2]
tier_dist = df_major.groupby(['College', 'Tier']).size().unstack(fill_value=0)
tier_dist.plot(kind='bar', stacked=True, ax=ax6, colormap='RdYlGn')
ax6.set_title('Tier Distribution by College', fontweight='bold', fontsize=12)
ax6.set_xlabel('')
ax6.set_ylabel('Number of Records')
ax6.legend(title='Tier', fontsize=8)
ax6.tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

## Field-wise Performance by College

In [None]:
# Field-wise comparison across colleges
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# Filter for key fields
key_fields = ['Computer Science', 'Information Technology', 'Mechanical', 'Electronics & Telecom']
df_cs = df_major[df_major['Field'].str.contains('Computer', case=False, na=False)]

# 1. Computer Science/IT Package Comparison
cs_it_data = df_major[df_major['Field'].isin(['Computer Science', 'Information Technology', 
                                                'Computer Engineering'])]
cs_pivot = cs_it_data.groupby('College')['Avg_Package_LPA'].mean().sort_values(ascending=False)
cs_pivot.plot(kind='bar', ax=axes[0, 0], color='dodgerblue', alpha=0.8)
axes[0, 0].set_title('CS/IT Average Package by College', fontweight='bold', fontsize=12)
axes[0, 0].set_ylabel('Average Package (LPA)')
axes[0, 0].set_xlabel('')
axes[0, 0].tick_params(axis='x', rotation=45)
axes[0, 0].grid(axis='y', alpha=0.3)

# 2. CS Placement Rate Comparison
cs_placement = cs_it_data.groupby('College')['Placement_Rate'].mean().sort_values(ascending=False)
cs_placement.plot(kind='bar', ax=axes[0, 1], color='lightcoral', alpha=0.8)
axes[0, 1].set_title('CS/IT Placement Rate by College', fontweight='bold', fontsize=12)
axes[0, 1].set_ylabel('Placement Rate (%)')
axes[0, 1].set_xlabel('')
axes[0, 1].tick_params(axis='x', rotation=45)
axes[0, 1].grid(axis='y', alpha=0.3)

# 3. Field diversity heatmap
pivot_package = df_major.pivot_table(values='Avg_Package_LPA', index='College', 
                                      columns='Field', aggfunc='mean')
# Select top fields
top_fields = df_major.groupby('Field')['Total_Students'].sum().nlargest(6).index
pivot_filtered = pivot_package[pivot_package.columns.intersection(top_fields)]

sns.heatmap(pivot_filtered, annot=True, fmt='.2f', cmap='YlGnBu', ax=axes[1, 0], 
            cbar_kws={'label': 'Avg Package'})
axes[1, 0].set_title('Package Heatmap: College vs Field', fontweight='bold', fontsize=12)
axes[1, 0].set_ylabel('College')

# 4. Comparative bar chart - multiple fields
field_college_stats = df_major[df_major['Field'].isin(['Computer Science', 'Mechanical', 
                                                         'Information Technology'])].groupby(
    ['College', 'Field'])['Avg_Package_LPA'].mean().unstack()

field_college_stats.plot(kind='bar', ax=axes[1, 1], width=0.8)
axes[1, 1].set_title('Package Comparison: CS vs IT vs Mechanical', fontweight='bold', fontsize=12)
axes[1, 1].set_ylabel('Average Package (LPA)')
axes[1, 1].set_xlabel('')
axes[1, 1].legend(title='Field', fontsize=9)
axes[1, 1].tick_params(axis='x', rotation=45)
axes[1, 1].grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()

# Display statistics
print("\n" + "="*80)
print("CS/IT Field Statistics by College:")
print("="*80)
display(cs_it_data.groupby('College').agg({
    'Avg_Package_LPA': 'mean',
    'Placement_Rate': 'mean',
    'Total_Students': 'sum'
}).round(2).sort_values('Avg_Package_LPA', ascending=False))

## Year-wise Trends by College

In [None]:
# Year-wise trends for major colleges
year_trends = df_major.groupby(['Year', 'College']).agg({
    'Avg_Package_LPA': 'mean',
    'Placement_Rate': 'mean'
}).reset_index()

fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# 1. Package trends over years
for college in major_colleges:
    college_data = year_trends[year_trends['College'] == college]
    axes[0].plot(college_data['Year'], college_data['Avg_Package_LPA'], 
                marker='o', label=college, linewidth=2, markersize=8)

axes[0].set_title('Average Package Trends by College (2023-2025)', fontweight='bold', fontsize=14)
axes[0].set_xlabel('Year', fontweight='bold')
axes[0].set_ylabel('Average Package (LPA)', fontweight='bold')
axes[0].legend(fontsize=9, loc='best')
axes[0].grid(alpha=0.3)

# 2. Placement rate trends over years
for college in major_colleges:
    college_data = year_trends[year_trends['College'] == college]
    axes[1].plot(college_data['Year'], college_data['Placement_Rate'], 
                marker='s', label=college, linewidth=2, markersize=8)

axes[1].set_title('Placement Rate Trends by College (2023-2025)', fontweight='bold', fontsize=14)
axes[1].set_xlabel('Year', fontweight='bold')
axes[1].set_ylabel('Placement Rate (%)', fontweight='bold')
axes[1].legend(fontsize=9, loc='best')
axes[1].grid(alpha=0.3)

plt.tight_layout()
plt.show()

# Year-over-year growth analysis
print("\n" + "="*80)
print("YEAR-OVER-YEAR GROWTH ANALYSIS")
print("="*80)
for college in major_colleges:
    college_years = df_major[df_major['College'] == college].groupby('Year').agg({
        'Avg_Package_LPA': 'mean'
    }).round(2)
    
    if len(college_years) > 1:
        years = sorted(college_years.index)
        print(f"\n{college}:")
        for i in range(1, len(years)):
            prev_pkg = college_years.loc[years[i-1], 'Avg_Package_LPA']
            curr_pkg = college_years.loc[years[i], 'Avg_Package_LPA']
            growth = ((curr_pkg - prev_pkg) / prev_pkg) * 100
            print(f"  {years[i-1]} ‚Üí {years[i]}: ‚Çπ{prev_pkg:.2f} ‚Üí ‚Çπ{curr_pkg:.2f} ({growth:+.1f}%)")

## Individual College Profiles

In [None]:
# Create detailed profiles for each major college
for college in major_colleges:
    college_data = df[df['College'] == college]
    
    print("\n" + "="*80)
    print(f"COLLEGE PROFILE: {college}")
    print("="*80)
    
    # Basic statistics
    total_students = college_data['Total_Students'].sum()
    total_placed = college_data['Placed_Students'].sum()
    avg_rate = college_data['Placement_Rate'].mean()
    avg_pkg = college_data['Avg_Package_LPA'].mean()
    
    print(f"Total Students: {total_students:,}")
    print(f"Total Placed: {total_placed:,}")
    print(f"Average Placement Rate: {avg_rate:.2f}%")
    print(f"Average Package: ‚Çπ{avg_pkg:.2f} LPA")
    print(f"Fields Offered: {college_data['Field'].nunique()}")
    print(f"Years of Data: {sorted(college_data['Year'].unique())}")
    
    # Top fields
    print(f"\nTop 5 Fields by Average Package:")
    top_fields = college_data.groupby('Field')['Avg_Package_LPA'].mean().nlargest(5)
    for idx, (field, pkg) in enumerate(top_fields.items(), 1):
        print(f"  {idx}. {field}: ‚Çπ{pkg:.2f} LPA")
    
    # Visualizations for this college
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    fig.suptitle(f'{college} - Detailed Analysis', fontsize=16, fontweight='bold')
    
    # Field-wise package
    field_stats = college_data.groupby('Field')['Avg_Package_LPA'].mean().sort_values(ascending=False)
    if len(field_stats) > 0:
        field_stats.head(8).plot(kind='barh', ax=axes[0, 0], color='skyblue')
        axes[0, 0].set_title('Top Fields by Package', fontweight='bold')
        axes[0, 0].set_xlabel('Average Package (LPA)')
    
    # Placement rate by field
    placement_by_field = college_data.groupby('Field')['Placement_Rate'].mean().sort_values(ascending=False)
    if len(placement_by_field) > 0:
        placement_by_field.head(8).plot(kind='barh', ax=axes[0, 1], color='lightcoral')
        axes[0, 1].set_title('Top Fields by Placement Rate', fontweight='bold')
        axes[0, 1].set_xlabel('Placement Rate (%)')
    
    # Year-wise trends
    year_stats = college_data.groupby('Year').agg({
        'Avg_Package_LPA': 'mean',
        'Placement_Rate': 'mean'
    })
    if len(year_stats) > 1:
        year_stats['Avg_Package_LPA'].plot(kind='line', marker='o', ax=axes[1, 0], 
                                            color='green', linewidth=2)
        axes[1, 0].set_title('Package Trend Over Years', fontweight='bold')
        axes[1, 0].set_ylabel('Average Package (LPA)')
        axes[1, 0].grid(alpha=0.3)
    
    # Student distribution
    student_dist = college_data.groupby('Field')['Total_Students'].sum().nlargest(6)
    if len(student_dist) > 0:
        student_dist.plot(kind='pie', ax=axes[1, 1], autopct='%1.1f%%', startangle=90)
        axes[1, 1].set_title('Student Distribution by Field (Top 6)', fontweight='bold')
        axes[1, 1].set_ylabel('')
    
    plt.tight_layout()
    plt.show()

## Comparative Rankings & Summary

In [None]:
# Final rankings and summary
print("="*80)
print("FINAL COLLEGE RANKINGS")
print("="*80)

# Create comprehensive ranking
ranking_data = df.groupby('College').agg({
    'Avg_Package_LPA': 'mean',
    'Placement_Rate': 'mean',
    'Total_Students': 'sum',
    'Placed_Students': 'sum'
}).round(2)

# Calculate ranks
ranking_data['Package_Rank'] = ranking_data['Avg_Package_LPA'].rank(ascending=False).astype(int)
ranking_data['Placement_Rank'] = ranking_data['Placement_Rate'].rank(ascending=False).astype(int)
ranking_data['Overall_Score'] = (ranking_data['Placement_Rate'] * 0.5 + 
                                  ranking_data['Avg_Package_LPA'] * 5)
ranking_data['Overall_Rank'] = ranking_data['Overall_Score'].rank(ascending=False).astype(int)

ranking_data = ranking_data.sort_values('Overall_Rank')

print("\nüèÜ TOP 10 COLLEGES - OVERALL RANKING:")
display(ranking_data[['Avg_Package_LPA', 'Placement_Rate', 'Package_Rank', 
                       'Placement_Rank', 'Overall_Rank']].head(10))

# Final comparison visualization
fig, ax = plt.subplots(figsize=(14, 8))

# Create radar-like comparison for top 6 colleges
top_6_colleges = ranking_data.head(6).index
metrics = ['Avg_Package_LPA', 'Placement_Rate']

x = np.arange(len(top_6_colleges))
width = 0.35

# Normalize for visualization
pkg_normalized = (ranking_data.loc[top_6_colleges, 'Avg_Package_LPA'] / 
                  ranking_data['Avg_Package_LPA'].max() * 100)
rate_normalized = ranking_data.loc[top_6_colleges, 'Placement_Rate']

ax.bar(x - width/2, pkg_normalized, width, label='Package Score (normalized)', color='steelblue', alpha=0.8)
ax.bar(x + width/2, rate_normalized, width, label='Placement Rate (%)', color='coral', alpha=0.8)

ax.set_ylabel('Score', fontweight='bold')
ax.set_title('Top 6 Colleges - Comparative Performance', fontsize=14, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(top_6_colleges, rotation=45, ha='right')
ax.legend()
ax.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()

print("\n" + "="*80)
print("KEY INSIGHTS:")
print("="*80)
print(f"‚úì Best for Packages: {ranking_data.iloc[ranking_data['Package_Rank'].idxmin()].name}")
print(f"‚úì Best for Placements: {ranking_data.iloc[ranking_data['Placement_Rank'].idxmin()].name}")
print(f"‚úì Overall Best: {ranking_data.iloc[0].name}")
print(f"‚úì Largest by Student Count: {ranking_data['Total_Students'].idxmax()}")
print("\n" + "="*80)