In [None]:
import pandas as pd
from matplotlib import pyplot as plt
import plotly.express as px
import plotly.io as pio
pio.renderers.default = 'notebook'


In [None]:
df = pd.read_csv("MeritList.csv")
df.columns

In [None]:
df['Gender'].value_counts()

In [None]:
df['Category'].value_counts()

In [None]:
df['EWS'].value_counts()

In [None]:
px.histogram(df, x='HSC / Diplomas / D.Voc. Total %')

In [None]:
px.density_contour(df, x='HSC / Diplomas / D.Voc. Total %', y='SSC Total %')
fig.update_layout(autosize=False, width=400, height=400)

In [None]:
import plotly.graph_objects as go

fig = go.Figure()

fig.add_trace(go.Histogram(
    x=df['HSC / Diplomas / D.Voc. Total %'],
    name='12th* score',
    opacity=0.6
))

fig.add_trace(go.Histogram(
    x=df['SSC Total %'],
    name='10th score',
    opacity=0.6
))

fig.update_layout(
    barmode='overlay',
    title='12th vs 10th scores',
    xaxis_title='Value',
    yaxis_title='Count'
)

fig.show()


In [None]:

fig_pcm_hsc = go.Figure()

fig_pcm_hsc.add_trace(go.Histogram(
    x=df['HSC Math %'],
    name='Math',
    opacity=0.4
))

fig_pcm_hsc.add_trace(go.Histogram(
    x=df['HSC Physics %'],
    name='Physics',
    opacity=0.4
))


fig_pcm_hsc.add_trace(go.Histogram(
    x=df['HSC PCM %']*3 - (df['HSC Physics %']+df['HSC Math %']),
    name='Chemistry',
    opacity=0.4
))

fig_pcm_hsc.update_layout(
    barmode='overlay',
    title='PCM scores HSC comparision',
    xaxis_title='Score',
    yaxis_title='Student count',
)

fig_pcm_hsc.show()


In [None]:
len(df[df['HSC PCM %'] < 45])

In [None]:

fig_pcm_hsc = go.Figure()

for category in df["Category"].unique():
    fig_pcm_hsc.add_trace(go.Histogram(
        x=df[df['Category'] == category]['Percentile/Mark'],
        name=category,
        opacity=0.3,
        histnorm='density'
    ))

fig_pcm_hsc.update_layout(
    barmode='overlay',
    title='Category-wise Percentile Distrbution (Not Normalised)',
    xaxis_title='Score',
    yaxis_title='Student count',
)

fig_pcm_hsc.show()


In [None]:
def name_by_category(s):
    return df["Category"][
        df["Candidate's Full Name"]
        .str.lower()
        .str.contains(s)
        .map(lambda x: x if type(x) is bool else False)
    ].value_counts()


def name_counts(s):
    return df["Category"][
        df["Candidate's Full Name"]
        .str.lower()
        .str.contains(s)
        .map(lambda x: x if type(x) is bool else False)
    ].count()


def name_percentage(s):
    total_cnt = name_counts(s)
    return name_by_category(s).map(lambda x: round(x / total_cnt * 100, 2))



In [None]:
from rich.progress import track
names = []
scores = []
for fullname in track(df["Candidate's Full Name"], df["Percentile/Mark"]):
    for name in str(fullname).split():
        names.append(name)

names = pd.Series(names)

vc = names.value_counts()
vc[vc==1].keys().to_list()