## Libraries

In [19]:
import pandas as pd
from sklearn.linear_model import LinearRegression
import statsmodels
import statsmodels.api as sm

import numpy as np

import sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix

import matplotlib.pyplot as plt
import seaborn as sn

# 设置全局选项
pd.set_option('display.max_rows', 300) # specifies number of rows to show
pd.options.display.float_format = '{:40,.4f}'.format # specifies default number format to 4 decimal places
plt.style.use('ggplot') # specifies that graphs should use ggplot styling
# plot in the Notebook
%matplotlib inline

# Read in Dataset

## London School Dataset

In [25]:
import pandas as pd

github_csv_url = "https://raw.githubusercontent.com/EthanLi1922/QM_Individual_Work/main/results_788.csv"
London_School = pd.read_csv(github_csv_url, encoding='latin1')  # 或者尝试'ISO-8859-1'或'cp1252'
London_School

Unnamed: 0,URN,LA (code),LA (name),EstablishmentNumber,EstablishmentName,TypeOfEstablishment (name),EstablishmentTypeGroup (name),EstablishmentStatus (name),ReasonEstablishmentOpened (name),OpenDate,...,QABReport,CHNumber,MSOA (code),LSOA (code),FSM,AccreditationExpiryDate,Linked establishments,Unnamed: 112,Unnamed: 113,Unnamed: 114
0,100049,202,Camden,4104,Haverstock School,Community school,Local authority maintained schools,Open,Not applicable,,...,,,E02000177,E01000902,457.0000,,Does not have links,,,
1,100050,202,Camden,4166,Parliament Hill School,Community school,Local authority maintained schools,Open,Not applicable,,...,,,E02000166,E01000912,317.0000,,132838 Sixth Form Centre Link,,,
2,100051,202,Camden,4196,Regent High School,Community school,Local authority maintained schools,Open,Not applicable,,...,,,E02000187,E01000952,559.0000,,Does not have links,,,
3,100052,202,Camden,4275,Hampstead School,Community school,Local authority maintained schools,Open,Not applicable,,...,,,E02000170,E01000871,432.0000,,Does not have links,,,
4,100053,202,Camden,4285,Acland Burghley School,Community school,Local authority maintained schools,Open,Not applicable,,...,,,E02000168,E01000928,311.0000,,132838 Sixth Form Centre Link,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
783,150055,306,Croydon,4016,Harris Academy Beulah Hill,Academy converter,Academies,Open,Split school,01-09-2023,...,,,E02000195,E01001159,,,135249 Predecessor - Split School,,,
784,150318,209,Lewisham,4323,Prendergast Ladywell School,Academy converter,Academies,Open,Academy Converter,01-01-2024,...,,,E02000667,E01003225,,,100747 Predecessor,,,
785,150319,209,Lewisham,4646,Prendergast School,Academy converter,Academies,Open,Academy Converter,01-01-2024,...,,,E02000666,E01003276,,,100750 Predecessor,,,
786,150351,209,Lewisham,5201,Prendergast Vale School,Academy converter,Academies,Open,Academy Converter,01-01-2024,...,,,E02007008,E01034395,,,135843 Predecessor,,,


In [32]:
# Extract boroughs name
print(London_School.columns)
London_School['LA (name)']

Index(['URN', 'LA (code)', 'LA (name)', 'EstablishmentNumber',
       'EstablishmentName', 'TypeOfEstablishment (name)',
       'EstablishmentTypeGroup (name)', 'EstablishmentStatus (name)',
       'ReasonEstablishmentOpened (name)', 'OpenDate',
       ...
       'QABReport', 'CHNumber', 'MSOA (code)', 'LSOA (code)', 'FSM',
       'AccreditationExpiryDate', 'Linked establishments', 'Unnamed: 112',
       'Unnamed: 113', 'Unnamed: 114'],
      dtype='object', length=115)


0             Camden
1             Camden
2             Camden
3             Camden
4             Camden
           ...      
783          Croydon
784         Lewisham
785         Lewisham
786         Lewisham
787    Hertfordshire
Name: LA (name), Length: 788, dtype: object

In [34]:
unique_values = London_School['LA (name)'].unique()
unique_values

array(['Camden', 'Greenwich', 'Hackney', 'Islington',
       'Kensington and Chelsea', 'Lambeth', 'Lewisham', 'Southwark',
       'Tower Hamlets', 'Westminster', 'Barking and Dagenham', 'Barnet',
       'Brent', 'Bromley', 'Croydon', 'Ealing', 'Enfield', 'Haringey',
       'Harrow', 'Hillingdon', 'Hounslow', 'Merton', 'Newham',
       'Redbridge', 'Richmond upon Thames', 'Sutton', 'Waltham Forest',
       'Luton', 'Bracknell Forest', 'Slough', 'Windsor and Maidenhead',
       'Buckinghamshire', 'Essex', 'Thurrock', 'Hertfordshire', 'Kent',
       'Medway', 'Surrey', 'West Sussex', 'Hammersmith and Fulham',
       'Wandsworth', 'Bexley', 'Havering', 'Kingston upon Thames'],
      dtype=object)

In [36]:
boroughs_to_remove = ["Luton", "Bracknell Forest", "Slough", "Windsor and Maidenhead", 
                      "Buckinghamshire", "Essex", "Thurrock", "Hertfordshire", 
                      "Kent", "Medway", "Surrey", "West Sussex"]

# 保留不在boroughs_to_remove列表中的行
London_School = London_School[~London_School['LA (name)'].isin(boroughs_to_remove)]
London_School['LA (name)'].unique()

array(['Camden', 'Greenwich', 'Hackney', 'Islington',
       'Kensington and Chelsea', 'Lambeth', 'Lewisham', 'Southwark',
       'Tower Hamlets', 'Westminster', 'Barking and Dagenham', 'Barnet',
       'Brent', 'Bromley', 'Croydon', 'Ealing', 'Enfield', 'Haringey',
       'Harrow', 'Hillingdon', 'Hounslow', 'Merton', 'Newham',
       'Redbridge', 'Richmond upon Thames', 'Sutton', 'Waltham Forest',
       'Hammersmith and Fulham', 'Wandsworth', 'Bexley', 'Havering',
       'Kingston upon Thames'], dtype=object)

In [38]:
London_School.shape

(500, 115)

In [39]:
London_School.columns

Index(['URN', 'LA (code)', 'LA (name)', 'EstablishmentNumber',
       'EstablishmentName', 'TypeOfEstablishment (name)',
       'EstablishmentTypeGroup (name)', 'EstablishmentStatus (name)',
       'ReasonEstablishmentOpened (name)', 'OpenDate',
       ...
       'QABReport', 'CHNumber', 'MSOA (code)', 'LSOA (code)', 'FSM',
       'AccreditationExpiryDate', 'Linked establishments', 'Unnamed: 112',
       'Unnamed: 113', 'Unnamed: 114'],
      dtype='object', length=115)

## School workforce in England Dataset