In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import dash
import dash_html_components as html
import dash_core_components as dcc
from jupyter_dash import JupyterDash
from dash.dependencies import Input, Output

In [2]:
df=pd.read_csv('cleaned/10yrsmerged.csv')

In [3]:
#df['Status'].replace({'Placed':1,
#                      'Not Placed':0},inplace=True)

# Gender Ratio

In [4]:
d=df.groupby(['Branch','Gender']).agg({'Gender':np.count_nonzero}).rename(columns={'Gender':'Count'}).reset_index()

In [25]:
px.bar(d,x='Branch',y='Count',color='Gender',barmode="group", title='Branch-Wise Gender Ratio')

# Branch-wise Average CGPA wrt Gender

In [6]:
f=df.groupby(['Branch','Gender']).agg({'CGPA':np.nanmean}).rename(columns={'CGPA':'Average CGPA'}).reset_index()

In [26]:
px.bar(f,x='Branch',y='Average CGPA',color='Gender',barmode="group",title='Branch-Wise and Gender-Wise Average CGPA')

In [8]:
#f=f.unstack()

# Increase in No. of Placements for IT branch from 2010 to 2021

In [24]:
px.line(df[df['Branch']=='IT'].groupby('Year').sum().reset_index(),x='Year',y='No. of Placements',labels=dict(x='Years'))

# Relationship between CGPA and No. of Placements

In [10]:
px.scatter(df[df['Branch']=='IT'],x='CGPA',y='No. of Placements',
           animation_frame='Year',range_y=[-1,10],range_x=[1,10]
           ,color='CGPA')

In [11]:
#relationship is non-linear so we do log transform
px.scatter(df,x='No. of Backlogs',y='No. of Placements',
           animation_frame='Year',range_y=[-1,10]
           ,log_x=True,color='No. of Backlogs')

In [12]:
#import seaborn as sns
#p=df
#p['Gender']=p.Gender.astype('category').cat.codes
#p['Branch']=p.Branch.astype('category').cat.codes
#p['Status']=p.Status.astype('category').cat.codes

In [13]:
#p.head()

In [14]:
#c=p.corr()

In [15]:
#sns.heatmap(c)

# Branch Wise Comparision of No. of Placements with respect to Gender

In [16]:
px.sunburst(df,path=['Branch','Gender'],values='No. of Placements',color='Branch',hover_name='No. of Placements')

# Comparision of Branch Wise Average CGPA 

In [17]:
px.bar(df.groupby('Branch').agg({'CGPA':np.nanmean}).reset_index().sort_values(['CGPA'],ascending=False),x='Branch',y='CGPA',color='Branch',hover_name='CGPA')

# Placed VS Unplaced students

In [18]:
vis=df[df['Year']==21].groupby(['Branch','Status']).agg({'Status':np.count_nonzero}).rename(columns={'Status':'Count'}).reset_index().sort_values(by=['Status','Count'],ascending=False)

In [19]:
px.bar(vis,x='Branch',y='Count',color='Status')

# Total No. of Offer letters VS No. of Placed students

In [20]:
vis1=df[(df['Year']==21)&(df['Status']=='Placed')].groupby('Branch').agg({'No. of Placements':np.nansum,'Status':np.count_nonzero}).rename(columns={'No. of Placements':'Total No. of Offer Letters','Status':'No. of Placed Students'}).reset_index()
vis1=pd.melt(vis1,id_vars=['Branch'],value_vars=['Total No. of Offer Letters','No. of Placed Students']).rename(columns={'value':'count'}).sort_values(by='count',ascending=False)

In [21]:
px.bar(vis1,x='Branch',y='count',color='variable',barmode='group')

# Change in Total No. of Offer letters and No. of Placed students over the years

In [22]:
vis3=df[df['Status']=='Placed'].groupby('Year').agg({'No. of Placements':np.nansum,'Status':np.count_nonzero}).rename(columns={'No. of Placements':'Total No. of Offer Letters','Status':'No. of Placed Students'}).reset_index()
vis3=pd.melt(vis3,id_vars=['Year'],value_vars=['Total No. of Offer Letters','No. of Placed Students']).rename(columns={'value':'count'})


In [23]:
px.line(vis3,x='Year',y='count',color='variable')