Data analysis of all the bee keepers around Slovakia

In [1]:
#import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
#get dataframe
df = pd.read_csv("Bees2023.csv", encoding='utf-8')

Parameters of dataset:

In [3]:
num_rows, num_columns = df.shape

print("Number of rows:", num_rows)
print("Number of columns:", num_columns)

Number of rows: 26248
Number of columns: 5


In [4]:
column_headers = df.columns

print("Column Headers:", column_headers)
df.head()

Column Headers: Index(['Cadastral territory', 'Municipality', 'Region', 'County', 'Amount'], dtype='object')


Unnamed: 0,Cadastral territory,Municipality,Region,County,Amount
0,Vinohrady nad Váhom,Vinohrady nad Váhom,GALANTA,Galanta,30
1,Vinohrady nad Váhom,Vinohrady nad Váhom,GALANTA,Galanta,16
2,Vinohrady nad Váhom,Vinohrady nad Váhom,GALANTA,Galanta,5
3,Vinohrady nad Váhom,Vinohrady nad Váhom,GALANTA,Galanta,0
4,Vinohrady nad Váhom,Vinohrady nad Váhom,GALANTA,Galanta,14


Configuration of dataset:

In [5]:
column_headers = df.columns

print("Column Headers:", column_headers)

Column Headers: Index(['Cadastral territory', 'Municipality', 'Region', 'County', 'Amount'], dtype='object')


Create new data frame (df1) with unique values:

In [6]:
df1 = df.drop_duplicates(subset=["Cadastral territory", "Municipality", "Region", "County"])
df1.shape
print(df1)

       Cadastral territory         Municipality   Region   County  Amount
0      Vinohrady nad Váhom  Vinohrady nad Váhom  GALANTA  Galanta      30
21                Vozokany             Vozokany  GALANTA  Galanta      10
24          Zemianske Sady       Zemianske Sady  GALANTA  Galanta      10
29                 Šoporňa              Šoporňa  GALANTA  Galanta       4
35              Tomášikovo           Tomášikovo  GALANTA  Galanta     100
...                    ...                  ...      ...      ...     ...
26189               Bzovík               Bzovík  KRUPINA   Zvolen       1
26202      Bzovská Lehôtka      Bzovská Lehôtka   ZVOLEN   Zvolen      18
26204               Cerovo               Cerovo  KRUPINA   Zvolen       9
26206    Čabradský Vrbovok    Čabradský Vrbovok  KRUPINA   Zvolen       4
26209              Čekovce              Čekovce  KRUPINA   Zvolen      11

[3377 rows x 5 columns]


In [7]:
#Dropping amounts
df1.drop('Amount', axis=1)

Unnamed: 0,Cadastral territory,Municipality,Region,County
0,Vinohrady nad Váhom,Vinohrady nad Váhom,GALANTA,Galanta
21,Vozokany,Vozokany,GALANTA,Galanta
24,Zemianske Sady,Zemianske Sady,GALANTA,Galanta
29,Šoporňa,Šoporňa,GALANTA,Galanta
35,Tomášikovo,Tomášikovo,GALANTA,Galanta
...,...,...,...,...
26189,Bzovík,Bzovík,KRUPINA,Zvolen
26202,Bzovská Lehôtka,Bzovská Lehôtka,ZVOLEN,Zvolen
26204,Cerovo,Cerovo,KRUPINA,Zvolen
26206,Čabradský Vrbovok,Čabradský Vrbovok,KRUPINA,Zvolen


Calculate number of bee hives per cadastral territory: (Here we count the number of occurences of each row from df1 in df (original data frame))

In [8]:
merged_df = pd.merge(df, df1, on=['Cadastral territory', 'Municipality', 'Region', 'County'])
count_bee_keepers_df = merged_df.groupby(['Cadastral territory', 'Municipality', 'Region', 'County']).size().reset_index(name='Bee keepers')

# Fill NaN values in the 'Sum' column with 0
count_bee_keepers_df['Bee keepers'].fillna(0, inplace=True)

print("\nCount of occurrences in df:")
print(count_bee_keepers_df)


Count of occurrences in df:
     Cadastral territory Municipality              Region           County  \
0                Abrahám      Abrahám             GALANTA          Galanta   
1            Abrahámovce  Abrahámovce            BARDEJOV         Bardejov   
2            Abrahámovce  Abrahámovce            KEŽMAROK           Poprad   
3               Abramová     Abramová  TURČIANSKE TEPLICE           Martin   
4              Abranovce    Abranovce              PREŠOV           Prešov   
...                  ...          ...                 ...              ...   
3372             Žlkovce      Žlkovce            HLOHOVEC           Trnava   
3373              Župkov       Župkov           ŽARNOVICA  Žiar nad Hronom   
3374             Župčany      Župčany              PREŠOV           Prešov   
3375                 Žíp          Žíp     RIMAVSKÁ SOBOTA  Rimavská Sobota   
3376             Žírovce      Herľany       KOŠICE-OKOLIE  Košice - okolie   

      Bee keepers  
0             

FINAL DATA FRAME WITH NUMBER OF BEE KEEPERS PER CADASTRAL TERRITORY

In [12]:
count_bee_keepers_df.drop("Cadastral territory",axis=1)
count_bee_keepers_df.drop("Municipality",axis=1)
count_bee_keepers_df.drop("Region",axis=1)
count_bee_keepers_df.drop("County",axis=1)

Unnamed: 0,Cadastral territory,Municipality,Region,Bee keepers
0,Abrahám,Abrahám,GALANTA,14
1,Abrahámovce,Abrahámovce,BARDEJOV,1
2,Abrahámovce,Abrahámovce,KEŽMAROK,4
3,Abramová,Abramová,TURČIANSKE TEPLICE,1
4,Abranovce,Abranovce,PREŠOV,1
...,...,...,...,...
3372,Žlkovce,Žlkovce,HLOHOVEC,6
3373,Župkov,Župkov,ŽARNOVICA,13
3374,Župčany,Župčany,PREŠOV,8
3375,Žíp,Žíp,RIMAVSKÁ SOBOTA,5


Calculate number of bee hives per cadastral territory: (count amount for each instance)

In [13]:
sum_df = df.groupby(['Cadastral territory', 'Municipality', 'Region', 'County'])['Amount'].sum().reset_index(name='Bee hives')

# Merge the sum information back into the DataFrame with unique instances
count_bee_hives_df = pd.merge(df1, sum_df, on=['Cadastral territory', 'Municipality', 'Region', 'County'], how='left')

# Fill NaN values in the 'Bee hives' column with 0
count_bee_hives_df['Bee hives'].fillna(0, inplace=True)

print(count_bee_hives_df)

      Cadastral territory         Municipality   Region   County  Amount  \
0     Vinohrady nad Váhom  Vinohrady nad Váhom  GALANTA  Galanta      30   
1                Vozokany             Vozokany  GALANTA  Galanta      10   
2          Zemianske Sady       Zemianske Sady  GALANTA  Galanta      10   
3                 Šoporňa              Šoporňa  GALANTA  Galanta       4   
4              Tomášikovo           Tomášikovo  GALANTA  Galanta     100   
...                   ...                  ...      ...      ...     ...   
3372               Bzovík               Bzovík  KRUPINA   Zvolen       1   
3373      Bzovská Lehôtka      Bzovská Lehôtka   ZVOLEN   Zvolen      18   
3374               Cerovo               Cerovo  KRUPINA   Zvolen       9   
3375    Čabradský Vrbovok    Čabradský Vrbovok  KRUPINA   Zvolen       4   
3376              Čekovce              Čekovce  KRUPINA   Zvolen      11   

      Bee hives  
0           295  
1            85  
2            61  
3           205

FINAL DATA FRAME WITH NUMBER OF BEE HIVES PER CADASTRAL TERRITORY:

In [14]:
count_bee_hives_df.drop("Amount",axis=1)

Unnamed: 0,Cadastral territory,Municipality,Region,County,Bee hives
0,Vinohrady nad Váhom,Vinohrady nad Váhom,GALANTA,Galanta,295
1,Vozokany,Vozokany,GALANTA,Galanta,85
2,Zemianske Sady,Zemianske Sady,GALANTA,Galanta,61
3,Šoporňa,Šoporňa,GALANTA,Galanta,205
4,Tomášikovo,Tomášikovo,GALANTA,Galanta,124
...,...,...,...,...,...
3372,Bzovík,Bzovík,KRUPINA,Zvolen,143
3373,Bzovská Lehôtka,Bzovská Lehôtka,ZVOLEN,Zvolen,28
3374,Cerovo,Cerovo,KRUPINA,Zvolen,10
3375,Čabradský Vrbovok,Čabradský Vrbovok,KRUPINA,Zvolen,39


RESULT DATAFRAME:

In [20]:
#Combine bee keepers and bee hives dfs
count_bee_keepers_df['Bee hives'] = count_bee_hives_df['Bee hives']
result_df = count_bee_keepers_df
count_bee_keepers_df.drop('Bee hives', axis=1)

print(result_df)
result_df.to_excel('per_cadastral_territory_excel.xlsx', index=False, encoding='utf-8')
result_df.to_csv('per_cadastral_territory_csv.csv', index=False, encoding='utf-8')

     Cadastral territory Municipality              Region           County  \
0                Abrahám      Abrahám             GALANTA          Galanta   
1            Abrahámovce  Abrahámovce            BARDEJOV         Bardejov   
2            Abrahámovce  Abrahámovce            KEŽMAROK           Poprad   
3               Abramová     Abramová  TURČIANSKE TEPLICE           Martin   
4              Abranovce    Abranovce              PREŠOV           Prešov   
...                  ...          ...                 ...              ...   
3372             Žlkovce      Žlkovce            HLOHOVEC           Trnava   
3373              Župkov       Župkov           ŽARNOVICA  Žiar nad Hronom   
3374             Župčany      Župčany              PREŠOV           Prešov   
3375                 Žíp          Žíp     RIMAVSKÁ SOBOTA  Rimavská Sobota   
3376             Žírovce      Herľany       KOŠICE-OKOLIE  Košice - okolie   

      Bee keepers  Bee hives  
0              14        295  
1

  return func(*args, **kwargs)
