# ANALAYSIS ON ARABLE PRODUCTION OF CEREAL CROPS IN IRELAND


## A CRISP-DM approach was used in this research

### STEP 1: Importing Relevant Libraries for Data Exploration and Analysis

In [1]:
# Importing the relevant libraries
import pandas as pd
import numpy as np
import seaborn as sns 
import matplotlib.pyplot as plt 
%matplotlib inline
sns.set(color_codes=True)
import statistics
import warnings
warnings.filterwarnings('ignore')

### STEP 2: Loading & Exploring Data on Cereal Crops in Ireland(source: Central Statistics Office)

In [2]:
# loading data into the data frame
df_cereals = pd.read_csv("AQA04.20221222T221238.csv")

In [3]:
# Displaying the top five rows in the data set
df_cereals.head()

Unnamed: 0,Statistic Label,Year,Type of Crop,UNIT,VALUE
0,Area under Crops,2008,Winter wheat,000 Hectares,87.5
1,Area under Crops,2008,Spring wheat,000 Hectares,23.2
2,Area under Crops,2008,Winter oats,000 Hectares,18.7
3,Area under Crops,2008,Spring oats,000 Hectares,4.2
4,Area under Crops,2008,Winter barley,000 Hectares,21.1


In [4]:
# Displaying the last five rows in the data set
df_cereals.tail()

Unnamed: 0,Statistic Label,Year,Type of Crop,UNIT,VALUE
247,Crop Production,2021,Spring wheat,000 Tonnes,52.7
248,Crop Production,2021,Winter oats,000 Tonnes,126.9
249,Crop Production,2021,Spring oats,000 Tonnes,111.4
250,Crop Production,2021,Winter barley,000 Tonnes,638.8
251,Crop Production,2021,Spring barley,000 Tonnes,917.6


In [5]:
# Displaying the info of the data set
# checking for data types because sometimes variables may be stored as string or an object
df_cereals.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 252 entries, 0 to 251
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Statistic Label  252 non-null    object 
 1   Year             252 non-null    int64  
 2   Type of Crop     252 non-null    object 
 3   UNIT             252 non-null    object 
 4   VALUE            252 non-null    float64
dtypes: float64(1), int64(1), object(3)
memory usage: 10.0+ KB


In [6]:
# Checking for missing values
print(df_cereals.isnull().sum()) 

Statistic Label    0
Year               0
Type of Crop       0
UNIT               0
VALUE              0
dtype: int64


In [7]:
# Function to Count the frequency of all values in a column
def count_values(df, column):
    counts = df[column].value_counts()
    return counts

df = df_cereals
counts = count_values(df, 'Statistic Label')
print(counts)


Area under Crops          84
Crop Yield per Hectare    84
Crop Production           84
Name: Statistic Label, dtype: int64


In [8]:
# Create a pivot table with the "Statistic Label", "Year", "UNIT" columns as the index,
# the "Type of Crop" column as the columns, and the 'VALUE' column as the values
irish_cer = df_cereals.pivot_table(index=["Statistic Label","Year","UNIT"], columns="Type of Crop", values="VALUE")



In [9]:
# Loading the pivotted table
irish_cer.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Type of Crop,Spring barley,Spring oats,Spring wheat,Winter barley,Winter oats,Winter wheat
Statistic Label,Year,UNIT,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Area under Crops,2008,000 Hectares,166.0,4.2,23.2,21.1,18.7,87.5
Area under Crops,2009,000 Hectares,174.3,11.3,20.2,19.3,9.1,64.3
Area under Crops,2010,000 Hectares,146.0,9.4,18.0,28.8,10.3,59.8
Area under Crops,2011,000 Hectares,144.8,12.4,16.4,35.9,9.0,77.7
Area under Crops,2012,000 Hectares,151.8,13.8,13.5,41.0,9.9,84.6


In [10]:
# resetting the index to view & manipulate data
irish_cer.reset_index(inplace = True)
irish_cer

Type of Crop,Statistic Label,Year,UNIT,Spring barley,Spring oats,Spring wheat,Winter barley,Winter oats,Winter wheat
0,Area under Crops,2008,000 Hectares,166.0,4.2,23.2,21.1,18.7,87.5
1,Area under Crops,2009,000 Hectares,174.3,11.3,20.2,19.3,9.1,64.3
2,Area under Crops,2010,000 Hectares,146.0,9.4,18.0,28.8,10.3,59.8
3,Area under Crops,2011,000 Hectares,144.8,12.4,16.4,35.9,9.0,77.7
4,Area under Crops,2012,000 Hectares,151.8,13.8,13.5,41.0,9.9,84.6
5,Area under Crops,2013,000 Hectares,183.5,21.3,15.2,36.0,5.4,45.4
6,Area under Crops,2014,000 Hectares,155.6,8.5,6.5,60.1,10.1,65.1
7,Area under Crops,2015,000 Hectares,133.0,12.1,10.1,69.8,11.4,55.3
8,Area under Crops,2016,000 Hectares,114.6,10.0,7.5,74.6,13.2,60.4
9,Area under Crops,2017,000 Hectares,115.2,10.0,6.8,65.0,14.4,60.3


In [11]:
irish_cer.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42 entries, 0 to 41
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Statistic Label  42 non-null     object 
 1   Year             42 non-null     int64  
 2   UNIT             42 non-null     object 
 3   Spring barley    42 non-null     float64
 4   Spring oats      42 non-null     float64
 5   Spring wheat     42 non-null     float64
 6   Winter barley    42 non-null     float64
 7   Winter oats      42 non-null     float64
 8   Winter wheat     42 non-null     float64
dtypes: float64(6), int64(1), object(2)
memory usage: 3.1+ KB


In [12]:
# frequency of values in the "Static label" column
df = irish_cer
counts = count_values(df, 'Statistic Label')
print(counts)

Area under Crops          14
Crop Production           14
Crop Yield per Hectare    14
Name: Statistic Label, dtype: int64


In [13]:
# Replacing 'Statistic Label' values with more descriptive names
irish_cer['Statistic Label'] = irish_cer['Statistic Label'].replace({'Area under Crops': 'Area under Crops(000ha)', 
                                                                   'Crop Production': 'Crop Production(000tonnes)',
                                                                   'Crop Yield per Hectare': 'Crop Yield per ha(tonnes)'})


In [14]:
irish_cer

Type of Crop,Statistic Label,Year,UNIT,Spring barley,Spring oats,Spring wheat,Winter barley,Winter oats,Winter wheat
0,Area under Crops(000ha),2008,000 Hectares,166.0,4.2,23.2,21.1,18.7,87.5
1,Area under Crops(000ha),2009,000 Hectares,174.3,11.3,20.2,19.3,9.1,64.3
2,Area under Crops(000ha),2010,000 Hectares,146.0,9.4,18.0,28.8,10.3,59.8
3,Area under Crops(000ha),2011,000 Hectares,144.8,12.4,16.4,35.9,9.0,77.7
4,Area under Crops(000ha),2012,000 Hectares,151.8,13.8,13.5,41.0,9.9,84.6
5,Area under Crops(000ha),2013,000 Hectares,183.5,21.3,15.2,36.0,5.4,45.4
6,Area under Crops(000ha),2014,000 Hectares,155.6,8.5,6.5,60.1,10.1,65.1
7,Area under Crops(000ha),2015,000 Hectares,133.0,12.1,10.1,69.8,11.4,55.3
8,Area under Crops(000ha),2016,000 Hectares,114.6,10.0,7.5,74.6,13.2,60.4
9,Area under Crops(000ha),2017,000 Hectares,115.2,10.0,6.8,65.0,14.4,60.3


In [16]:
irish_cer = irish_cer.drop(columns=["UNIT"])
irish_cer

Type of Crop,Statistic Label,Year,Spring barley,Spring oats,Spring wheat,Winter barley,Winter oats,Winter wheat
0,Area under Crops(000ha),2008,166.0,4.2,23.2,21.1,18.7,87.5
1,Area under Crops(000ha),2009,174.3,11.3,20.2,19.3,9.1,64.3
2,Area under Crops(000ha),2010,146.0,9.4,18.0,28.8,10.3,59.8
3,Area under Crops(000ha),2011,144.8,12.4,16.4,35.9,9.0,77.7
4,Area under Crops(000ha),2012,151.8,13.8,13.5,41.0,9.9,84.6
5,Area under Crops(000ha),2013,183.5,21.3,15.2,36.0,5.4,45.4
6,Area under Crops(000ha),2014,155.6,8.5,6.5,60.1,10.1,65.1
7,Area under Crops(000ha),2015,133.0,12.1,10.1,69.8,11.4,55.3
8,Area under Crops(000ha),2016,114.6,10.0,7.5,74.6,13.2,60.4
9,Area under Crops(000ha),2017,115.2,10.0,6.8,65.0,14.4,60.3
