In [10]:
# importing librarys

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px

## Loading the dataset

In [11]:
df = pd.read_csv('pakistan_education_institutions_detailed.csv')
df.head()

Unnamed: 0,Province,School Level,Regular Private Schools,Elite Private Schools,Total Institutions
0,Punjab,Primary,25650,1350,27000
1,Punjab,Middle,7650,450,8100
2,Punjab,Secondary,5100,300,5400
3,Punjab,Higher Secondary,3850,150,4000
4,Punjab,Colleges,500,0,500


In [12]:
#removing rows in which province is Total
df = df[df['Province'] != 'Total']
df.size

150

## Grouping by Province and School Level

In [13]:
# Grouping by Province and summing the number of schools
df_province_schools = df.groupby('Province')[['Regular Private Schools', 'Elite Private Schools']].sum().sort_values(by='Regular Private Schools', ascending=False)

# Grouping by School Level and summing the number of schools
df_schools_levels = df.groupby('School Level')[['Regular Private Schools', 'Elite Private Schools']].sum().sort_index(ascending=False)

# Displaying results with clear formatting
print(" Total Number of Schools by Province:\n")
print(df_province_schools)
print("\n" + "---" * 20 + "\n")
print(" Total Number of Schools by Type of School Level:\n")
print(df_schools_levels)

 Total Number of Schools by Province:

             Regular Private Schools  Elite Private Schools
Province                                                   
Punjab                         42750                   2250
Sindh                          23800                   1250
KPK                            14250                    750
Balochistan                     7600                    400
AJK                             3800                    200
GB                              2479                    130

------------------------------------------------------------

 Total Number of Schools by Type of School Level:

                  Regular Private Schools  Elite Private Schools
School Level                                                    
Secondary                           11312                    662
Primary                             56776                   2988
Middle                              16934                    996
Higher Secondary                     8517 

In [14]:
#Total numbers of Schools in pakistan

tot_colleges = df.groupby('School Level')['Regular Private Schools'].sum() + df.groupby('School Level')['Elite Private Schools'].sum()
tot_colleges

School Level
Colleges             1140
Higher Secondary     8851
Middle              17930
Primary             59764
Secondary           11974
dtype: int64

In [15]:
df_bar = df.groupby('Province')[['Regular Private Schools', 'Elite Private Schools']].sum().reset_index()

# Plotting grouped bar chart using Plotly
fig = px.bar(
    df_bar,
    x='Province',
    y=['Regular Private Schools', 'Elite Private Schools'],
    barmode='group',
    title='Average Number of School Types by Province'
)

fig.update_layout(
    xaxis_title='Province',
    yaxis_title='Total Number of Schools'
)
# downloading the plot as a PNG file
fig.write_image("average_number_of_school_types_by_province.png")
fig.show()

In [16]:
plt.figure(figsize=(12, 8))
fig = px.scatter_3d(df, x='Regular Private Schools', y='Elite Private Schools', z='Total Institutions', color='Province', title='3D Scatter Plot of School Types')
fig.update_layout(scene=dict(
    xaxis_title='Regular Private Schools',
    yaxis_title='Elite Private Schools',
    zaxis_title='Total Institutions'
))
fig.show()

<Figure size 1200x800 with 0 Axes>

In [17]:
plt.figure(figsize=(12, 8))
fig = px.sunburst(df, path=['Province', 'School Level'], values='Regular Private Schools', title='Sunburst Chart of Educational Institutions in Pakistan')
fig.write_html("sunburst_chart.html")
fig.show()

<Figure size 1200x800 with 0 Axes>

In [18]:
# Calculating total fee assuming every student pays PKR 1500
df['Total Fee'] = df['Total Institutions'] * 1500

#  Grouping by Province to sum total fees
df_bar = df.groupby('Province')['Total Fee'].sum().reset_index()

# Plotting with Plotly
import plotly.express as px
fig = px.bar(df_bar, x='Province', y='Total Fee',
             title='Total  Fee (PKR) by Province',
             labels={'Total Fee': 'Total Fee (PKR)', 'Province': 'Province'},
             text='Total Fee')

fig.update_layout(xaxis_title='Province', yaxis_title='Total Fee (PKR)',
                  title_font_size=18, yaxis_tickprefix='PKR ')
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.write_image("total_fee_by_province.png")

fig.show()