# School-based Analysis

## Configuration

In [None]:
# Check free memory available
%system free -m

In [None]:
# Import the necessary libraries

# Basic python libraries
import os
import numpy as np

# Data visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Google cloud libraries
from google.cloud import bigquery
from google.cloud import storage

# Pandas and BigQuery
import pandas_gbq as pdg
import pandas as pd

In [None]:
# Current working directory
os.getcwd()

# Display all columns
pd.set_option('display.max_columns', None)

In [None]:
# Set output data and output locations
raw_data = "../data/raw/"
interim_data = "../data/interim/"
processed_data = "../data/processed/"

figures = "../reports/figures/"
config = "../config/"

## Data Loading

### neet_chd

In [None]:
# Set the filename for your CSV file
csv_filename = "neet_chd.csv"

# Combine the path and filename
csv_filepath = os.path.join(processed_data, csv_filename)

# Load the CSV file into a DataFrame
neet_chd_df = pd.read_csv(csv_filepath)

# Display the loaded DataFrame
neet_chd_df

## School-based Analysis

### Data Filteration

In [None]:
# Filter the neet_chd_df by 'home_lsoa_in_bradford' and 'la_estab_at_age_15' starting with 380
neet_school = neet_chd_df[(neet_chd_df['home_lsoa_in_bradford']) & neet_chd_df["la_estab_at_age_15"].astype(str).str.startswith('380')]

# Display the filtered DataFrame
neet_school

### Count of individuals by Schools in Bradford

In [None]:
# Set 'school_code' as the index
neet_school = neet_school.set_index('school_code')

# Aggregating data by 'school_code' and summing the count of different scenarios separately
aggregated_data = neet_school.groupby('school_code').agg(
    ever_neet_status=('ever_neet_status', 'sum'),
    persistent_neet_status=('persistent_neet_status', 'sum'),
    total_persons=('person_id', 'count')
).reset_index()

# Calculate the percentage of NEET and persistent NEET for each school with 2 decimal places
aggregated_data['percentage_ever_neet'] = round((aggregated_data['ever_neet_status'] / aggregated_data['total_persons']) * 100, 2)
aggregated_data['percentage_persistent_neet'] = round((aggregated_data['persistent_neet_status'] / aggregated_data['total_persons']) * 100, 2)

In [None]:
# Rank the schools based on percentage of NEET and persistent NEET
aggregated_data = aggregated_data.sort_values(by=['percentage_ever_neet', 'percentage_persistent_neet'], ascending=False)

# Display the result
aggregated_data

### Ordered Bar Chart

In [None]:
# Extracting data for all schools
schools = aggregated_data['school_code']
percentage_ever_neet = aggregated_data['percentage_ever_neet']
percentage_persistent_neet = aggregated_data['percentage_persistent_neet']

# Width of each bar
bar_width = 0.4

# Generate positions for bars
positions = np.arange(len(schools))

# Create a figure
plt.figure(figsize=(16, 10))

# Plotting percentage of ever NEET and persistent NEET side by side
plt.bar(positions - bar_width/2, percentage_ever_neet, bar_width, label='Ever NEET')
plt.bar(positions + bar_width/2, percentage_persistent_neet, bar_width, color='orange', label='Persistent NEET')

# Set x-axis ticks and labels with increased font size
plt.xticks(positions, schools, rotation=90, fontsize=14)  # Increased font size

# Set labels and title with increased font size
plt.xlabel('Schools', fontsize=16)  # Increased font size
plt.ylabel('Percentage', fontsize=16)  # Increased font size
plt.title('Percentage of Ever NEET and Persistent NEET by School', fontsize=18)

# Add legend with increased font size
plt.legend(fontsize=14)

# Remove the spines (top and right) and set grid lines only on the y-axis
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)
plt.grid(axis="y", linestyle="--", alpha=0.5)

plt.tight_layout()
plt.show()