# Census of Agriculture - DAERA, Northern Ireland

In [1]:
import os
from datetime import datetime, timezone
import pandas as pd
import pooch

In [2]:
URL = (
    "https://admin.opendatani.gov.uk/dataset/"
    "2a936744-dd04-457d-99b5-0000450af4fb/resource/"
    "7c3fa1e0-fadf-4cd3-b3d4-7d500274f226/download/"
    "farm-census---ward2014-2015-19-sup.csv"
)
KNOWN_HASH = None
FILE_NAME = "farm-census-ward2014.csv"
SUB_DIR = os.path.join("data", "agricultural_census", "DAERA")
DATA_FILE = os.path.join(SUB_DIR, FILE_NAME)
os.makedirs(SUB_DIR, exist_ok=True)

In [3]:
# download data if necessary
if not os.path.isfile(os.path.join(SUB_DIR, FILE_NAME)):
    pooch.retrieve(
        url=URL,
        known_hash=KNOWN_HASH,
        fname=FILE_NAME,
        path=SUB_DIR
    )

    with open(
        os.path.join(SUB_DIR, f"{FILE_NAME[:-4]}.txt"), "w", encoding="utf-8"
    ) as outfile:
        outfile.write(
            f"Data downloaded on: {datetime.now(tz=timezone.utc)}\n"
            f"Download URL: {URL}"
        )

Downloading data from 'https://admin.opendatani.gov.uk/dataset/2a936744-dd04-457d-99b5-0000450af4fb/resource/7c3fa1e0-fadf-4cd3-b3d4-7d500274f226/download/farm-census---ward2014-2015-19-sup.csv' to file '/run/media/nms/Backup/Documents/Git/ClimAg/ClimAg/data/agricultural_census/DAERA/farm-census-ward2014.csv'.
SHA256 hash of downloaded file: 5c7add70b1e2c9b2d334a200e300b7fa84bacdb3226683a442ce387d87a9aa16
Use this value as the 'known_hash' argument of 'pooch.retrieve' to ensure that the file hasn't changed if it is downloaded again in the future.


In [3]:
coa = pd.read_csv(DATA_FILE, encoding_errors="replace")

In [4]:
coa.head()

Unnamed: 0,Ward2014 Code,Ward2014 Name,Year,Number of Farms,Number of lfa farms,Number of non lfa farms,Total area farmed in hectares,Average SO/farm (� '000 ),Total crops in hectares,Total grass & rough grazing in hectares,Total number of cattle,Total number of sheep,Total number of pigs,Total number of poultry ('000),Number of farmers,Number of self employed,Number of spouses,Number of Other workers,Total number of Agricultural labour
0,N08000101,Abbey (Antrim and Newtownabbey),2015,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3
1,N08000102,Aldergrove,2015,103,5,98,6452,114,237,5933,10899,12756,1139,137,129,87,44,90,263
2,N08000103,Antrim Centre,2015,5,3,3,41,164,0,40,42,0,3368,0,5,3,0,0,5
3,N08000104,Ballyclare East,2015,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3
4,N08000105,Ballyclare West,2015,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3


In [5]:
# filter grass, cattle, and sheep data
coa = coa[[
    "Ward2014 Code", "Ward2014 Name", "Year",
    "Total grass & rough grazing in hectares  ", "Total number of cattle  ",
    "Total number of sheep  "
]]

In [6]:
coa.head()

Unnamed: 0,Ward2014 Code,Ward2014 Name,Year,Total grass & rough grazing in hectares,Total number of cattle,Total number of sheep
0,N08000101,Abbey (Antrim and Newtownabbey),2015,3,3,3
1,N08000102,Aldergrove,2015,5933,10899,12756
2,N08000103,Antrim Centre,2015,40,42,0
3,N08000104,Ballyclare East,2015,3,3,3
4,N08000105,Ballyclare West,2015,3,3,3


In [7]:
# keep data for 2018
coa = coa[coa["Year"] == 2018]

In [8]:
# rename columns
coa.rename(
    columns={
        "Ward2014 Code": "ward_2014_code",
        "Ward2014 Name": "ward_2014_name",
        "Year": "year",
        "Total grass & rough grazing in hectares  ": "total_grass_hectares",
        "Total number of cattle  ": "total_cattle",
        "Total number of sheep  ": "total_sheep"
    },
    inplace=True
)

In [9]:
coa.head()

Unnamed: 0,ward_2014_code,ward_2014_name,year,total_grass_hectares,total_cattle,total_sheep
1386,N08000101,Abbey (Antrim and Newtownabbey),2018,3,3,3
1387,N08000102,Aldergrove,2018,5676,10784,15053
1388,N08000103,Antrim Centre,2018,3,3,3
1389,N08000104,Ballyclare East,2018,3,3,3
1390,N08000105,Ballyclare West,2018,148,207,152


In [21]:
# check for missing data
coa.index[coa.isnull().any(axis=1)]

Int64Index([], dtype='int64')

In [11]:
# save as a CSV file
coa.to_csv(
    os.path.join(SUB_DIR, "daera_agricultural_census.csv"), index=False
)