In [23]:
from may2025_report_exhibits import sctevt_df
import pandas as pd
from sams.analysis.utils import save_table_excel

In [24]:
# Set year to 2022
sctevt_df_2022 = sctevt_df[sctevt_df["year"] == 2022]
sctevt_df_2022 = sctevt_df_2022[sctevt_df_2022["gender"] != "O"]
sctevt_dropouts_df_2022 = sctevt_df_2022[sctevt_df_2022["dropout"]]

In [29]:
sctevt_df.columns

Index(['year', 'roll_num', 'iti_code', 'trade', 'gender', 'overall_result_y1',
       'overall_result_y2', 'type_of_institute', 'course_period', 'dropout',
       'retained'],
      dtype='object')

In [25]:
# SCTEVT enrollment in 2022 by trade and gender
sctevt_enrollment_trades_gender_2022 = pd.pivot_table(
    sctevt_df_2022,
    index='trade',
    columns='gender',
    values='roll_num',
    aggfunc='nunique'
)

sctevt_enrollment_trades_gender_2022 = sctevt_enrollment_trades_gender_2022.astype("Int64")
sctevt_enrollment_trades_gender_2022["Total"] = sctevt_enrollment_trades_gender_2022.sum(axis=1)
sctevt_enrollment_trades_gender_2022 = sctevt_enrollment_trades_gender_2022.sort_values(by="Total", ascending=False)
sctevt_enrollment_trades_gender_2022

gender,Female,Male,Total
trade,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Electrician (NSQF),2027.0,19376.0,21403
Fitter (NSQF),1392.0,17281.0,18673
Welder (NSQF),317.0,2140.0,2457
Computer Operator and Programming Assistant (NSQF),923.0,639.0,1562
Mechanic (Motor Vehicle) (NSQF),82.0,1131.0,1213
Electronics Mechanic (NSQF),211.0,747.0,958
Sewing Technology (NSQF),871.0,52.0,923
Mechanic Diesel (NSQF),33.0,579.0,612
Machinist (NSQF),56.0,521.0,577
Plumber (NSQF),69.0,463.0,532


In [26]:
# SCTEVT dropouts by trade and gender in 2022
sctevt_dropouts_trades_gender_2022 = pd.pivot_table(
    sctevt_dropouts_df_2022,
    index='trade',
    columns='gender',
    values='roll_num',
    aggfunc='nunique'
)

sctevt_dropouts_trades_gender_2022 = sctevt_dropouts_trades_gender_2022.astype("Int64")
sctevt_dropouts_trades_gender_2022["Total"] = sctevt_dropouts_trades_gender_2022.sum(axis=1)

# Order dropouts by the order in the enrollment table
ordered_trades = sctevt_enrollment_trades_gender_2022.index
sctevt_dropouts_trades_gender_2022 = sctevt_dropouts_trades_gender_2022.reindex(ordered_trades)
sctevt_dropouts_trades_gender_2022



gender,Female,Male,Total
trade,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Electrician (NSQF),893.0,4253.0,5146.0
Fitter (NSQF),817.0,5106.0,5923.0
Welder (NSQF),134.0,437.0,571.0
Computer Operator and Programming Assistant (NSQF),133.0,101.0,234.0
Mechanic (Motor Vehicle) (NSQF),32.0,358.0,390.0
Electronics Mechanic (NSQF),78.0,211.0,289.0
Sewing Technology (NSQF),312.0,37.0,349.0
Mechanic Diesel (NSQF),4.0,84.0,88.0
Machinist (NSQF),21.0,112.0,133.0
Plumber (NSQF),39.0,125.0,164.0


In [27]:
# Compute enrollment by instiute in 2022
sctevt_enrollment_inst_2022 = pd.pivot_table(
    sctevt_df_2022,
    index='institute_name',
    values='roll_num',
    aggfunc='nunique'
)

sctevt_enrollment_inst_2022 = sctevt_enrollment_inst_2022.astype("Int64")

# Merge institute type column
sctevt_enrollment_inst_2022 = pd.merge(sctevt_enrollment_inst_2022, sctevt_df_2022[["institute_name", "type_of_institute"]].drop_duplicates(), on="institute_name", how="left")

sctevt_enrollment_inst_2022.rename(columns={"roll_num":"enrollment"}, inplace=True)
cols_order = ["institute_name", "type_of_institute", "enrollment"]
sctevt_enrollment_inst_2022 = sctevt_enrollment_inst_2022[cols_order]
sctevt_enrollment_inst_2022

KeyError: 'institute_name'

In [None]:
# Compute dropout by institute in 2022
sctevt_dropouts_inst_2022 = pd.pivot_table(
    sctevt_dropouts_df_2022,
    index='institute_name',
    values='roll_num',
    aggfunc='nunique'
)

sctevt_dropouts_inst_2022 = sctevt_dropouts_inst_2022.astype("Int64")
sctevt_dropouts_inst_2022.rename(columns={"roll_num":"dropouts"}, inplace=True)
sctevt_dropouts_inst_2022


In [None]:
# Merge enrollment and dropout table on institutes
sctevt_enrollment_dropouts_inst_2022 = pd.merge(sctevt_enrollment_inst_2022, sctevt_dropouts_inst_2022, on="institute_name", how="left")
sctevt_enrollment_dropouts_inst_2022["dropout_rate"] = sctevt_enrollment_dropouts_inst_2022["dropouts"].div(sctevt_enrollment_dropouts_inst_2022["enrollment"]).mul(100).round(1)
sctevt_enrollment_dropouts_inst_2022 = sctevt_enrollment_dropouts_inst_2022.sort_values(by="enrollment", ascending=False)
sctevt_enrollment_dropouts_inst_2022

In [None]:
# Dropouts by trade and gender, public
sctevt_dropouts_trade_gender_public = pd.pivot_table(
    sctevt_df[sctevt_df["type_of_institute"] == "Govt."],
    index="trade",
    columns="gender",
    values="roll_num",
    aggfunc="nunique"
)

sctevt_dropouts_trade_gender_public = sctevt_dropouts_trade_gender_public.astype("Int64")
sctevt_dropouts_trade_gender_public["Total"] = sctevt_dropouts_trade_gender_public.sum(axis=1)
sctevt_dropouts_trade_gender_public.sort_values(by="Total", ascending=False, inplace=True)

In [None]:
sctevt_dropouts_trade_gender_private = pd.pivot_table(
    sctevt_df[sctevt_df["type_of_institute"] == "Pvt."],
    index="trade",
    columns="gender",
    values="roll_num",
    aggfunc="nunique"
)

sctevt_dropouts_trade_gender_private = sctevt_dropouts_trade_gender_private.astype("Int64")
sctevt_dropouts_trade_gender_private = sctevt_dropouts_trade_gender_private.drop(columns=["O"])
sctevt_dropouts_trade_gender_private = sctevt_dropouts_trade_gender_private.fillna(0)
sctevt_dropouts_trade_gender_private["Total"] = sctevt_dropouts_trade_gender_private.sum(axis=1)
sctevt_dropouts_trade_gender_private.sort_values(by="Total", ascending=False, inplace=True)


In [None]:
sctevt_dropouts_trade_gender_private


gender,Female,Male,Total
trade,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Electrician (NSQF),6909,79658,86567
Fitter (NSQF),4245,77222,81467
Computer Operator and Programming Assistant (NSQF),1181,1285,2466
Welder (NSQF),86,1869,1955
Sewing Technology (NSQF),1651,106,1757
Mechanic (Motor Vehicle) (NSQF),9,1101,1110
Electronics Mechanic (NSQF),272,586,858
Plumber (NSQF),56,512,568
Mechanic Diesel (NSQF),18,483,501
Machinist (NSQF),10,464,474


In [31]:
from sams.config import OUTPUT_DIR
dfs = [
    sctevt_enrollment_trades_gender_2022,
    sctevt_dropouts_trades_gender_2022,
    sctevt_dropouts_trade_gender_public,
    sctevt_dropouts_trade_gender_private,
]
sheet_names = [
    "SCTEVT Enrollments by Trade and Gender (2022)",
    "SCTEVT Dropouts by Trade and Gender (2022)",
    "SCTEVT Dropouts by Trade and Gender (Public)",
    "SCTEVT Dropouts by Trade and Gender (Private)"
]
index = [True, True, True, True]
save_table_excel(dfs, sheet_names, index, outfile= OUTPUT_DIR / "sctevt_dropouts_trade_gender_institute.xlsx")

[32m2025-07-08 11:49:08.017[0m | [1mINFO    [0m | [36msams.analysis.utils[0m:[36msave_table_excel[0m:[36m123[0m - [1mSaving DataFrame to sheet: SCTEVT Enrollments by Trade and Gender (2022) to /Users/ymohanty/Documents/GitHub/sams/output/sctevt_dropouts_trade_gender_institute.xlsx[0m
[32m2025-07-08 11:49:08.022[0m | [1mINFO    [0m | [36msams.analysis.utils[0m:[36msave_table_excel[0m:[36m123[0m - [1mSaving DataFrame to sheet: SCTEVT Dropouts by Trade and Gender (2022) to /Users/ymohanty/Documents/GitHub/sams/output/sctevt_dropouts_trade_gender_institute.xlsx[0m
[32m2025-07-08 11:49:08.030[0m | [1mINFO    [0m | [36msams.analysis.utils[0m:[36msave_table_excel[0m:[36m123[0m - [1mSaving DataFrame to sheet: SCTEVT Dropouts by Trade and Gender (Public) to /Users/ymohanty/Documents/GitHub/sams/output/sctevt_dropouts_trade_gender_institute.xlsx[0m
[32m2025-07-08 11:49:08.037[0m | [1mINFO    [0m | [36msams.analysis.utils[0m:[36msave_table_excel[0m:[36

