In [1]:
import pickle
import geopandas as gpd
import pandas as pd
import os
import math as m

In [2]:
# Change the current working directory
os.chdir('C:/Users/aladesuru/sciebo/StormLab/Research/Damilola/DataAnalysis/Lab/Niedersachsen')
# Print the current working directory to verify the change
print(os.getcwd())

C:\Users\aladesuru\sciebo\StormLab\Research\Damilola\DataAnalysis\Lab\Niedersachsen


# Load pickle file

In [3]:
with open('data/interim/gld.pkl', 'rb') as f:
    gld = pickle.load(f)
gld.info()    
gld.head() 

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 10883342 entries, 2 to 480626
Data columns (total 9 columns):
 #   Column     Dtype   
---  ------     -----   
 0   id         int64   
 1   FLIK       object  
 2   year       int32   
 3   area_m2    float64 
 4   peri_m     float64 
 5   shp_index  float64 
 6   fract      float64 
 7   CELLCODE   object  
 8   geometry   geometry
dtypes: float64(4), geometry(1), int32(1), int64(1), object(2)
memory usage: 788.8+ MB


Unnamed: 0,id,FLIK,year,area_m2,peri_m,shp_index,fract,CELLCODE,geometry
2,2,DENILI0414780096,2012,15703.427395,578.467014,1.302196,1.316626,10kmE434N334,"POLYGON ((586867.422 5897421.974, 586647.781 5..."
3,3,DENILI1022670011,2012,245.197084,123.057997,2.216906,1.749401,10kmE430N332,"POLYGON ((553668.492 5875236.581, 553684.023 5..."
4,4,DENILI0402500114,2012,10863.828285,440.057366,1.191003,1.309971,10kmE418N339,"POLYGON ((428044.838 5941127.279, 428044.115 5..."
5,5,DENILI0919270076,2012,3678.626252,283.778694,1.319873,1.375881,10kmE421N330,"POLYGON ((462855.306 5854847.840, 462754.791 5..."
6,6,DENILI0825740002,2012,2413.421262,227.650693,1.307216,1.393748,10kmE435N330,"POLYGON ((604705.953 5858831.705, 604729.937 5..."


# Field/ landscape level descriptive statistics

In [None]:
    # total number of fields per year
    # min, max and mean value of field size, peri and shape index
    # per year across landscape. We could have a box plot of these values
    # across years.
ldscp1_desc_stats = gld.groupby('year')[['area_m2', 'peri_m', 'shp_index',\
    'fract']].describe()
ldscp1_desc_stats.to_csv('reports/statistics/ldscp1_desc_stats.csv') 
#save to csv

# Grid level descriptive statistics

In [4]:
# total number of grids within the geographic boundaries of the study area
print("gridcount =", gld['CELLCODE'].nunique())

# Create table of year, grid id, number of fields in grid, mean field size, sd_fs, mean peri, sd_peri, mean shape index, sd_shape index.
griddf = gld[['year', 'CELLCODE']].copy()

# Before we continue, first check if number of entries for area_m2, peri_m, shp and fract within each cellcode is thesame
counts = gld.groupby('CELLCODE')[['area_m2', 'peri_m', 'shp_index', 'fract']].count()
same_counts = (counts['area_m2'] == counts['peri_m']) & (counts['area_m2'] == counts['shp_index']) & (counts['area_m2'] == counts['fract'])
different_counts = counts[~same_counts]
different_counts

gridcount = 1019


Unnamed: 0_level_0,area_m2,peri_m,shp_index,fract
CELLCODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


1. Number of fields per grid

In [5]:
fields = gld.groupby(['year', 'CELLCODE'])['area_m2'].count().reset_index()
fields.columns = ['year', 'CELLCODE', 'fields']
fields.head()
griddf = pd.merge(griddf, fields, on=['year', 'CELLCODE'])

2. Sum of field size per grid

In [6]:
fs_sum = gld.groupby(['year', 'CELLCODE'])['area_m2'].sum().reset_index()
fs_sum.columns = ['year', 'CELLCODE', 'fs_sum']
fs_sum.head()
griddf = pd.merge(griddf, fs_sum, on=['year', 'CELLCODE'])


3. Mean field size in the grid

In [7]:

griddf['mfs_ha'] = (griddf['fs_sum'] / griddf['fields'])*(1/10000)


4. Standard deviation of field size in the grid (ha)

In [8]:
sdfs_ha = gld.groupby(['year', 'CELLCODE'])['area_m2'].std()*(1/10000)
sdfs_ha = sdfs_ha.reset_index()
sdfs_ha.columns = ['year', 'CELLCODE', 'sdfs_ha']
griddf = pd.merge(griddf, sdfs_ha, on=['year', 'CELLCODE'])



5. Since thesame, then we can use fields column as number of fields in the grid, sum of field peri per grid

In [9]:

peri_sum = gld.groupby(['year', 'CELLCODE'])['peri_m'].sum().reset_index()
peri_sum.columns = ['year', 'CELLCODE', 'peri_sum']
peri_sum.head()
griddf = pd.merge(griddf, peri_sum, on=['year', 'CELLCODE'])


6. Mean perimeter in the grids

In [10]:
griddf['mperi'] = (griddf['peri_sum'] / griddf['fields'])


7. Standard deviation of perimeter in the grids

In [11]:
sdperi = gld.groupby(['year', 'CELLCODE'])['peri_m'].std()
sdperi = sdperi.reset_index()
sdperi.columns = ['year', 'CELLCODE', 'sdperi']
griddf = pd.merge(griddf, sdperi, on=['year', 'CELLCODE'])

8. Mean shape index in the grids

In [12]:
mean_shp = gld.groupby(['year', 'CELLCODE'])['shp_index'].mean().reset_index()
mean_shp.columns = ['year', 'CELLCODE', 'mean_shp']
griddf = pd.merge(griddf, mean_shp, on=['year', 'CELLCODE'])


9. Standard deviation of shape index in the grids

In [13]:
sd_shp = gld.groupby(['year', 'CELLCODE'])['shp_index'].std().reset_index()
sd_shp.columns = ['year', 'CELLCODE', 'sd_shp']
griddf = pd.merge(griddf, sd_shp, on=['year', 'CELLCODE'])


10. Mean fractal dimension in the grids

In [14]:
mean_fract = gld.groupby(['year', 'CELLCODE'])['fract'].mean().reset_index()
mean_fract.columns = ['year', 'CELLCODE', 'mean_fract']
griddf = pd.merge(griddf, mean_fract, on=['year', 'CELLCODE'])


11. Standard deviation of fractal dimension in the grids

In [15]:
sd_fract = gld.groupby(['year', 'CELLCODE'])['fract'].std().reset_index()
sd_fract.columns = ['year', 'CELLCODE', 'sd_fract']
griddf = pd.merge(griddf, sd_fract, on=['year', 'CELLCODE'])

In [17]:
griddf.head()

Unnamed: 0,year,CELLCODE,fields,fs_sum,mfs_ha,sdfs_ha,peri_sum,mperi,sdperi,mean_shp,sd_shp,mean_fract,sd_fract
0,2012,10kmE434N334,1341,43147500.0,3.217562,4.046473,1093294.0,815.282567,504.445713,1.60892,0.6945,1.358011,0.102878
1,2012,10kmE430N332,1724,53437850.0,3.099643,3.129916,1327503.0,770.013466,393.914503,1.461039,0.587853,1.335254,0.096564
2,2012,10kmE418N339,2881,83937720.0,2.913492,2.751341,2231098.0,774.418028,427.116027,1.371696,0.362438,1.316281,0.050521
3,2012,10kmE421N330,2113,68564580.0,3.244892,3.222869,1646686.0,779.311949,396.365678,1.394458,0.38,1.323164,0.069188
4,2012,10kmE435N330,880,27289400.0,3.101068,3.196208,672583.1,764.298951,371.213426,1.442224,0.45613,1.332436,0.079198


In [16]:
# Save to csv
griddf.to_csv('data/interim/griddf.csv')

In [None]:
import qgrid.enable()
qgrid_widget = qgrid.show_grid(griddf, show_toolbar=True)
qgrid_widget