# Charity Commission for England and Wales

Links:
- Data Definition Document: https://register-of-charities.charitycommission.gov.uk/documents/34602/422354/Data+Definition.docx/f0a342ce-ef45-1401-ee75-26225f6f0d4f?t=1617010186385
- Full Register Download: https://register-of-charities.charitycommission.gov.uk/register/full-register-download

File names and descriptions on the website:
- publicextract.charity.zip: Contains charity information like name, registration status, activities, and address.
- publicextract.charityclass.zip: Contains information on charity classifications and categories.
- publicextract.charityaoo.zip: Contains information about the areas of operation for the charities.

Potentially useful variables:
1. charity_name: The name of the charity.
2. register_status: Indicates whether the charity is currently registered or removed.
3. charity_activities: Describes the activities the charity is involved in.
4. charity_contact_address: Contains the address of the charity.
5. area_of_operation: Indicates the area where the charity operates.

charity_name:
File: publicextract.charity.json
URL: https://ccewuksprdoneregsadata1.blob.core.windows.net/data/json/publicextract.charity.zip

register_status:
File: publicextract.charityregisterstatus.json
URL: This information is already available in the 'charity' file under the column 'charity_registration_status'

charity_activities:
File: publicextract.charityactivities.json
URL: This information is already available in the 'charity' file under the column 'charity_activities'

area_of_operation:
File: publicextract.charityareasofoperation.json
URL: https://ccewuksprdoneregsadata1.blob.core.windows.net/data/json/publicextract.charityareaofoperation.zip



In [1]:
# https://ccewuksprdoneregsadata1.blob.core.windows.net/data/json/publicextract.charity.zip

import requests
import zipfile
import io
import os

# Download the zip file
url = "https://ccewuksprdoneregsadata1.blob.core.windows.net/data/json/publicextract.charity.zip"
response = requests.get(url)

# Extract the zip file
zip_file = zipfile.ZipFile(io.BytesIO(response.content))

# Check if the 01_Data_Collection folder exists, if not, create it
data_collection_folder = "01_Data_Collection"
if not os.path.exists(data_collection_folder):
    os.makedirs(data_collection_folder)

# Extract the JSON file to the 01_Data_Collection folder
json_filename = "publicextract.charity.json"
zip_file.extract(json_filename, data_collection_folder)

print(f"JSON file has been extracted to the {data_collection_folder} folder.")



JSON file has been extracted to the 01_Data_Collection folder.


In [2]:
import pandas as pd
import json

# Load the JSON file with 'utf-8-sig' encoding
with open('01_Data_Collection/publicextract.charity.json', 'r', encoding='utf-8-sig') as file:
    data = json.load(file)

# Convert the JSON data to a pandas DataFrame
charity_data = pd.json_normalize(data)

# Display the content of the DataFrame
display(charity_data)


Unnamed: 0,date_of_extract,organisation_number,registered_charity_number,linked_charity_number,charity_name,charity_type,charity_registration_status,date_of_registration,date_of_removal,charity_reporting_status,...,charity_insolvent,charity_in_administration,charity_previously_excepted,charity_is_cdf_or_cif,charity_is_cio,cio_is_dissolved,date_cio_dissolution_notice,charity_activities,charity_gift_aid,charity_has_land
0,2023-04-15T00:00:00,1,200027,1,POTTERNE MISSION ROOM AND TRUST,,Removed,1962-05-17T00:00:00,2014-04-16T00:00:00,,...,False,False,,,,,,,,
1,2023-04-15T00:00:00,2,200027,2,HITCHAM FREE CHURCH,,Registered,1962-05-17T00:00:00,,,...,False,False,,,,,,,,
2,2023-04-15T00:00:00,3,200028,1,TOWN LANDS CHARITY FOR THE POOR,,Removed,1961-10-19T00:00:00,1997-09-17T00:00:00,,...,False,False,,,,,,,,
3,2023-04-15T00:00:00,4,200028,2,TOWN LANDS CHARITY FOR THE CHURCH,,Removed,1961-10-19T00:00:00,1997-09-17T00:00:00,,...,False,False,,,,,,,,
4,2023-04-15T00:00:00,5,200034,1,CLOPHILL RELIEF IN NEED CHARITY,,Registered,1972-07-19T00:00:00,,,...,False,False,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
381567,2023-04-15T00:00:00,5217722,1202697,0,BUDE CANCER SUPPORT,CIO,Registered,2023-04-12T00:00:00,,New,...,False,False,False,,True,False,,,,
381568,2023-04-15T00:00:00,5217798,1202715,0,OH SENSORY ME,CIO,Registered,2023-04-13T00:00:00,,New,...,False,False,False,,True,False,,,,
381569,2023-04-15T00:00:00,5217805,1202698,0,AN-NAKHLAH AID,CIO,Registered,2023-04-12T00:00:00,,New,...,False,False,False,,True,False,,Poverty,,False
381570,2023-04-15T00:00:00,5217881,1202699,0,SAVE THE HUMANITY,CIO,Registered,2023-04-12T00:00:00,,New,...,False,False,False,,True,False,,Poverty,,False


In [11]:
import urllib.request
import zipfile

# Download the charity_area_of_operation zip file
url_area_of_operation = "https://ccewuksprdoneregsadata1.blob.core.windows.net/data/json/publicextract.charity_area_of_operation.zip"
urllib.request.urlretrieve(url_area_of_operation, "charityareaofoperation.zip")

# Extract the JSON file from the zip
with zipfile.ZipFile("charityareaofoperation.zip", "r") as area_of_operation_zip:
    area_of_operation_zip.extractall('01_Data_Collection')


In [15]:
import json

# Load the charity_area_of_operation JSON file into a Python object
with open('01_Data_Collection/publicextract.charity_area_of_operation.json', 'r', encoding='utf-8-sig') as area_of_operation_file:
    area_of_operation_data = json.load(area_of_operation_file)


# Convert the JSON data to a pandas DataFrame
area_of_operation_df = pd.json_normalize(area_of_operation_data)

# Display the content of the DataFrame
display(area_of_operation_df)

Unnamed: 0,date_of_extract,organisation_number,registered_charity_number,linked_charity_number,geographic_area_type,geographic_area_description,parent_geographic_area_type,parent_geographic_area_description,welsh_ind
0,2023-04-15T00:00:00,200001,200001,0,Region,Throughout England And Wales,,,False
1,2023-04-15T00:00:00,200002,200002,0,Region,Throughout England And Wales,,,False
2,2023-04-15T00:00:00,200002,200002,0,Country,Italy,Continent,Europe,False
3,2023-04-15T00:00:00,200002,200002,0,Country,Spain,Continent,Europe,False
4,2023-04-15T00:00:00,200003,200003,0,Region,Throughout England And Wales,,,False
...,...,...,...,...,...,...,...,...,...
512728,2023-04-15T00:00:00,5006592,1137314,0,Country,India,Continent,Asia,False
512729,2023-04-15T00:00:00,5209141,1201451,0,Country,Bangladesh,Continent,Asia,False
512730,2023-04-15T00:00:00,5209141,1201451,0,Country,India,Continent,Asia,False
512731,2023-04-15T00:00:00,5140416,1185433,0,Country,United States,Continent,North America,False


In [None]:
# doesn't look the register data would be useful for my project
# but if was good to explore this option