In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from numpy.lib.recfunctions import append_fields
from function import functionNeed
plt.style.use('seaborn')

In [2]:
expenditureSG = np.genfromtxt('./datasets/Expenditure(SG).csv',delimiter=',',names=True,dtype=['i8','U50','i8'])
expenditureUK = np.genfromtxt('./datasets/Expenditure(UK).csv',delimiter=',',names=True,usecols=(0,1,2,3,4,5,6,7),dtype=['U50','U8','U8','U8','U50','U8','U8','f8'])

In [3]:
#Filter out the years required which is more than 2015 and before 2021
filter1 = (expenditureSG['type_of_educational_institutions'] != 'Others') & (expenditureSG['type_of_educational_institutions'] != 'total') & ((expenditureSG['year']) >= 2015)
expenditureSG = expenditureSG[filter1]
filter2 = (expenditureUK['time_period'] != '2020-21') & (expenditureUK['education_function'] !='total')
expenditureUK = expenditureUK[filter2]

# Data Analysis 

In [4]:
def data(j,x):
    print(f'\nThe names of the columns are:')
    functionNeed.columnInArr(j.dtype.names,j)
    functionNeed.uniqueValues(j.dtype.names,j)
    functionNeed.characteristics(j[x])


In [5]:
#For singapore
data(expenditureSG,'recurrent_expenditure')



The names of the columns are:
- year <class 'numpy.dtype[int64]'> isnumeric: True, null values: False 
- type_of_educational_institutions <class 'numpy.dtype[str_]'> isnumeric: False, null values: False 
- recurrent_expenditure <class 'numpy.dtype[int64]'> isnumeric: True, null values: False 

The unique values of each column are:
5 unique values in year
6 unique values in type_of_educational_institutions
30 unique values in recurrent_expenditure
The number of elements is:              30
The min is:          376956
The max is:         3243605
The mean is:      1769797.00
The variance is: 1212919897580.60
The median is     1904286.500
The standard deviation is:     1101326.426
The lower quartile is      470299.000
The upper quartile is     2782988.500



In [6]:
#For UK
data(expenditureUK,'t_expenditure_millions')


The names of the columns are:
- ï»¿location <class 'numpy.dtype[str_]'> isnumeric: False, null values: False 
- location_code <class 'numpy.dtype[str_]'> isnumeric: False, null values: False 
- geographic_level <class 'numpy.dtype[str_]'> isnumeric: False, null values: False 
- time_period <class 'numpy.dtype[str_]'> isnumeric: False, null values: False 
- education_function <class 'numpy.dtype[str_]'> isnumeric: False, null values: False 
- expenditure_level <class 'numpy.dtype[str_]'> isnumeric: False, null values: False 
- expenditure_type <class 'numpy.dtype[str_]'> isnumeric: False, null values: False 
- t_expenditure_millions <class 'numpy.dtype[float64]'> isnumeric: False, null values: False 

The unique values of each column are:
1 unique values in ï»¿location
1 unique values in location_code
1 unique values in geographic_level
5 unique values in time_period
4 unique values in education_function
1 unique values in expenditure_level
1 unique values in expenditure_type
20 unique

In [7]:
#Filter of datasets by education level
years = np.array([set(expenditureSG['year'])])
primary_school = expenditureSG[expenditureSG['type_of_educational_institutions'] == 'Primary Schools']['recurrent_expenditure']
secondary_school = expenditureSG[expenditureSG['type_of_educational_institutions'] == 'Secondary Schools']['recurrent_expenditure']
Junior_college = expenditureSG[expenditureSG['type_of_educational_institutions'] == 'Junior Colleges / Centralised Institute']['recurrent_expenditure']
ite = expenditureSG[expenditureSG['type_of_educational_institutions'] == 'Institute of Technical Education']['recurrent_expenditure']
poly = expenditureSG[expenditureSG['type_of_educational_institutions'] == 'Polytechnics']['recurrent_expenditure']
uni = expenditureSG[expenditureSG['type_of_educational_institutions'] == 'Universities']['recurrent_expenditure']
total_expenditure = [np.sum(x) for x in  zip(primary_school,secondary_school,Junior_college,ite,poly,uni)]

In [8]:
#Filterof UK dataset by education level
primary_schoolUK = expenditureUK[(expenditureUK['education_function'] == 'Primary education') ]['t_expenditure_millions']*1000
secondary_schoolUK = expenditureUK[(expenditureUK['education_function'] == 'Secondary education') ]['t_expenditure_millions']*1000
tertiraryUK = expenditureUK[(expenditureUK['education_function'] == 'Tertiary education') ]['t_expenditure_millions']*1000
total_expenditureUK = [np.sum(x) for x in  zip(primary_schoolUK,secondary_schoolUK,tertiraryUK)]

In [9]:
%matplotlib qt
fig,ax = plt.subplots(3,2,figsize=(15,20))
fig.suptitle('Spending comparison between UK and SG',fontsize=30,color='Navy')
#Converting it to a list
bar1UK = np.add(primary_schoolUK,secondary_schoolUK).tolist()
years = [str(i) for i in range(2015,2020)]
stack = ax[0,0]
#Combing uni,poly ite toegeter as tertiary
#Creatubg a stack plot
stack.stackplot(years,[primary_school,secondary_school,[(a+b+j+k) for a,b,j,k in zip(ite,uni,poly,Junior_college)]])

stack.plot(years,total_expenditure,color='r')
stack.set_title('Proportion of speding base on level in SG')
ax[0,1].stackplot(years,[primary_schoolUK,secondary_schoolUK,tertiraryUK])
ax[0,1].plot(years,total_expenditureUK,color='r')
ax[0,1].set_title('Proportion of speding base on level in UK')
#Remove the spines
for i in range(2):
	ax[0,i].spines['bottom'].set_visible(False)
	ax[0,i].spines['left'].set_visible(False)
	ax[0,i].spines['right'].set_visible(False)
	ax[0,i].spines['top'].set_visible(False)
#Creating the pie chart
piechart1 = ax[1,0]
labels = ['primary_school','secondary_school','tertiary']
wedges,_ = piechart1.pie([np.mean(primary_school),np.mean(secondary_school),np.mean(ite)+np.mean(Junior_college)+np.mean(poly)+np.mean(uni)],	wedgeprops = {
		'linewidth': 3
	},labels = labels)
piechart1.text(-0.40,-0.0,'expenditure')
#Creating the donut space
donut, _= piechart1.pie(primary_school,colors = ['white'],radius=0.5)
#Creating the second pie chart
piechart2 = ax[1,1]
wedges1,_1 = piechart2.pie([np.mean(primary_schoolUK),np.mean(secondary_schoolUK),np.mean(tertiraryUK)],	wedgeprops = {
		'linewidth': 3
	},labels = labels)

piechart2.text(-0.40,-0.0,'expenditure')
donut2, _2= piechart2.pie(primary_school,colors = ['white'],radius=0.5)
piechart1.set_title('Proportion of mean amount of money spent on each level')
piechart2.set_title('Proportion of mean amount of money spent on each level')
#Providing click functions
box = ax[2,0]
box2 = ax[2,1]
box.set_visible(False)
box2.set_visible(False)
def onclick(event):
	#Creating the click function for interactivity
	if event.inaxes == piechart1:
		for i, wedge in enumerate(wedges):
			if (wedge.contains_point([event.x, event.y])):
				# fig,ax = plt.subplots()
				if wedge.get_label() == 'tertiary':

					box.clear()
					box.set_title(f'{wedge.get_label()} spending from 2015 to 2019')
					box.plot(years,[(a+b+j+k) for a,b,j,k in zip(ite,uni,poly,Junior_college)])
					box.set_visible(True)
					box.set_ylim(bottom=0)
					plt.draw()

				elif wedge.get_label() == 'primary_school':

					box.clear()
					box.set_title(f'{wedge.get_label()} spending from 2015 to 2019')
					box.plot(years,primary_school)
					box.set_visible(True)
					box.set_ylim(bottom=0)
					plt.draw()
				elif wedge.get_label() == 'secondary_school':
					box.clear()
					box.set_title(f'{wedge.get_label()} spending from 2015 to 2019')
					box.plot(years,secondary_school)
					box.set_visible(True)
					box.set_ylim(bottom=0)
					plt.draw()
	elif event.inaxes == piechart2:
		for i, wedge in enumerate(wedges1):
			if (wedge.contains_point([event.x, event.y])):
				if wedge.get_label() == 'tertiary':
					box2.clear()
					box2.set_title(f'{wedge.get_label()} spending from 2015 to 2019')
					box2.plot(years,tertiraryUK)
					box2.set_visible(True)
					box2.set_ylim(bottom=0)
					plt.draw()

				elif wedge.get_label() == 'primary_school':
					box2.clear()
					box2.set_title(f'{wedge.get_label()} spending from 2015 to 2019')
					box2.plot(years,primary_schoolUK)
					box2.set_visible(True)
					box2.set_ylim(bottom=0)
					plt.draw()
				elif wedge.get_label() == 'secondary_school':
					box2.clear()
					box2.set_title(f'{wedge.get_label()} spending from 2015 to 2019')
					box2.plot(years,secondary_schoolUK)
					box2.set_visible(True)
					box2.set_ylim(bottom=0)
					plt.draw()
cid = fig.canvas.mpl_connect('button_press_event', onclick)
plt.show()
