### importing library and CSV files

In [16]:
import pandas as pd 
import numpy as np

In [17]:
# Set the maximum number of columns to display
pd.set_option('display.max_columns', 30)

In [18]:
# Load the datasets
df_education = pd.read_csv('18_01_Juveniles_arrested_Education.csv')
df_economic = pd.read_csv('18_02_Juveniles_arrested_Economic_setup.csv')
df_family = pd.read_csv('18_03_Juveniles_arrested_Family_background.csv')
df_recidivism = pd.read_csv('18_04_Juveniles_arrested_Recidivism.csv')

In [19]:
# Merge the datasets based on Area_Name and Year with specified suffixes to avoid conflicts
df_merged = pd.merge(df_education, df_economic, on=["Area_Name", "Year"], how="outer", suffixes=('', '_econ'))
df_merged = pd.merge(df_merged, df_family, on=["Area_Name", "Year"], how="outer", suffixes=('', '_fam'))
df_merged = pd.merge(df_merged, df_recidivism, on=["Area_Name", "Year"], how="outer", suffixes=('', '_rec'))

# Check for missing values
missing_values = df_merged.isna().sum()


In [20]:
df_merged.head(5)

Unnamed: 0,Area_Name,Year,Sub_Group_Name,Education_Above_Primary_but_below_Matric_or_Higher_Secondary,Education_Illiterate,Education_Matric_or_Higher_Secondary_&_above,Education_Total,Education_Upto_primary,Sub_Group_Name_econ,Economic_Set_up_Annual_Income_250001_to_50000,Economic_Set_up_Annual_Income_upto_Rs_25000,Economic_Set_up_Middle_income_from_100001_to_200000,Economic_Set_up_Middle_income_from_50001_to_100000,Economic_Set_up_Total,Economic_Set_up_Upper_income_above_Rs_300000,Economic_Set_up_Upper_middle_income_from_200001_to_300000,Sub_Group_Name_fam,Family_back_ground_Homeless,Family_back_ground_Living_with_guardian,Family_back_ground_Living_with_parents,Family_back_ground_Total,Sub_Group_Name_rec,Recidivism_New_Delinquent,Recidivism_Old_Delinquent,Recidivism_Total
0,Andaman & Nicobar Islands,2001,1. Education,12,0,0,16,4,2. Economic Setup,12,4,0,0,16,0,0,3. Family Background,0,0,16,16,4. Recidivism,16,0,16
1,Andaman & Nicobar Islands,2002,1. Education,4,0,0,6,2,2. Economic Setup,5,1,0,0,6,0,0,3. Family Background,0,0,6,6,4. Recidivism,6,0,6
2,Andaman & Nicobar Islands,2003,1. Education,7,0,0,9,2,2. Economic Setup,3,2,0,4,9,0,0,3. Family Background,0,0,9,9,4. Recidivism,9,0,9
3,Andaman & Nicobar Islands,2004,1. Education,6,0,0,13,7,2. Economic Setup,1,12,0,0,13,0,0,3. Family Background,0,5,8,13,4. Recidivism,13,0,13
4,Andaman & Nicobar Islands,2005,1. Education,6,0,0,10,4,2. Economic Setup,8,2,0,0,10,0,0,3. Family Background,0,3,7,10,4. Recidivism,10,0,10


In [21]:
df_merged.columns

Index(['Area_Name', 'Year', 'Sub_Group_Name',
       'Education_Above_Primary_but_below_Matric_or_Higher_Secondary',
       'Education_Illiterate', 'Education_Matric_or_Higher_Secondary_&_above',
       'Education_Total', 'Education_Upto_primary', 'Sub_Group_Name_econ',
       'Economic_Set_up_Annual_Income_250001_to_50000',
       'Economic_Set_up_Annual_Income_upto_Rs_25000',
       'Economic_Set_up_Middle_income_from_100001_to_200000',
       'Economic_Set_up_Middle_income_from_50001_to_100000',
       'Economic_Set_up_Total', 'Economic_Set_up_Upper_income_above_Rs_300000',
       'Economic_Set_up_Upper_middle_income_from_200001_to_300000',
       'Sub_Group_Name_fam', 'Family_back_ground_Homeless',
       'Family_back_ground_Living_with_guardian',
       'Family_back_ground_Living_with_parents', 'Family_back_ground_Total',
       'Sub_Group_Name_rec', 'Recidivism_New_Delinquent',
       'Recidivism_Old_Delinquent', 'Recidivism_Total'],
      dtype='object')

In [22]:
# Columns to remove
columns_to_drop = ['Sub_Group_Name_econ', 'Sub_Group_Name_fam', 'Sub_Group_Name_rec','Sub_Group_Name']

# Dropping the columns from the DataFrame
df_merged.drop(columns=columns_to_drop, inplace=True)

# Display the DataFrame to confirm the columns have been removed
df_merged.head()

Unnamed: 0,Area_Name,Year,Education_Above_Primary_but_below_Matric_or_Higher_Secondary,Education_Illiterate,Education_Matric_or_Higher_Secondary_&_above,Education_Total,Education_Upto_primary,Economic_Set_up_Annual_Income_250001_to_50000,Economic_Set_up_Annual_Income_upto_Rs_25000,Economic_Set_up_Middle_income_from_100001_to_200000,Economic_Set_up_Middle_income_from_50001_to_100000,Economic_Set_up_Total,Economic_Set_up_Upper_income_above_Rs_300000,Economic_Set_up_Upper_middle_income_from_200001_to_300000,Family_back_ground_Homeless,Family_back_ground_Living_with_guardian,Family_back_ground_Living_with_parents,Family_back_ground_Total,Recidivism_New_Delinquent,Recidivism_Old_Delinquent,Recidivism_Total
0,Andaman & Nicobar Islands,2001,12,0,0,16,4,12,4,0,0,16,0,0,0,0,16,16,16,0,16
1,Andaman & Nicobar Islands,2002,4,0,0,6,2,5,1,0,0,6,0,0,0,0,6,6,6,0,6
2,Andaman & Nicobar Islands,2003,7,0,0,9,2,3,2,0,4,9,0,0,0,0,9,9,9,0,9
3,Andaman & Nicobar Islands,2004,6,0,0,13,7,1,12,0,0,13,0,0,0,5,8,13,13,0,13
4,Andaman & Nicobar Islands,2005,6,0,0,10,4,8,2,0,0,10,0,0,0,3,7,10,10,0,10


In [23]:
# Load the IPC data
df_ipc = pd.read_csv('08_01_Juvenile_apprehended_state_IPC.csv')

# Load the SLL data
df_sll = pd.read_csv('08_02_Juvenile_apprehended_state_SLL.csv')


# Rename columns if necessary
df_ipc.rename(columns={'STATE/UT': 'Area_Name'}, inplace=True)
df_sll.rename(columns={'STATE/UT': 'Area_Name'}, inplace=True)



In [27]:
df_merged.head()

Unnamed: 0,Area_Name,Year,Education_Above_Primary_but_below_Matric_or_Higher_Secondary,Education_Illiterate,Education_Matric_or_Higher_Secondary_&_above,Education_Total,Education_Upto_primary,Economic_Set_up_Annual_Income_250001_to_50000,Economic_Set_up_Annual_Income_upto_Rs_25000,Economic_Set_up_Middle_income_from_100001_to_200000,Economic_Set_up_Middle_income_from_50001_to_100000,Economic_Set_up_Total,Economic_Set_up_Upper_income_above_Rs_300000,Economic_Set_up_Upper_middle_income_from_200001_to_300000,Family_back_ground_Homeless,Family_back_ground_Living_with_guardian,Family_back_ground_Living_with_parents,Family_back_ground_Total,Recidivism_New_Delinquent,Recidivism_Old_Delinquent,Recidivism_Total
0,Andaman & Nicobar Islands,2001,12,0,0,16,4,12,4,0,0,16,0,0,0,0,16,16,16,0,16
1,Andaman & Nicobar Islands,2002,4,0,0,6,2,5,1,0,0,6,0,0,0,0,6,6,6,0,6
2,Andaman & Nicobar Islands,2003,7,0,0,9,2,3,2,0,4,9,0,0,0,0,9,9,9,0,9
3,Andaman & Nicobar Islands,2004,6,0,0,13,7,1,12,0,0,13,0,0,0,5,8,13,13,0,13
4,Andaman & Nicobar Islands,2005,6,0,0,10,4,8,2,0,0,10,0,0,0,3,7,10,10,0,10


In [28]:
# Save the DataFrame to a CSV file
df_merged.to_csv('df_merged.csv', index=False)