In [2]:
import pyodbc 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats

### 1. Reading input data

In [4]:
## Reading AGB and transforming the df into a new schema:
print ("connect to engine......")
engine = sa.create_engine('mssql+pyodbc://' + "GREENMONKEY" + '/' + "Climate_Impact" + '?trusted_connection=yes&driver=ODBC+Driver+13+for+SQL+Server')

print ("connection to greenmonkey found")
print ("Reading wilfdire table from greenmonkey")
query3=('''
        SELECT *  
     from   
       [Climate_Impact].wild_fire.[F_AGBflux_sum]
        ''')  
df_F_AGBflux_sum = pd.read_sql(query3, engine)
print(df_F_AGBflux_sum.head(26))

connect to engine......
connection to greenmonkey found
Reading wilfdire table from greenmonkey
          GridNum10km   01-11-2014   01-11-2015   01-11-2016   01-11-2017  \
0   22821536360562688          0.0          0.0          0.0          0.0   
1    9860785350115328          0.0          0.0          0.0          0.0   
2   27660980955643904          0.0          0.0          0.0          0.0   
3   18883523796533248          0.0          0.0          0.0          0.0   
4   14521696744636416          0.0          0.0          0.0          0.0   
5    4842545561468928          0.0          0.0          0.0          0.0   
6    5207660731301888          0.0          0.0          0.0          0.0   
7   14657941697200128          0.0          0.0          0.0          0.0   
8    5144713690611712          0.0          0.0          0.0          0.0   
9    9958586050412544          0.0          0.0          0.0          0.0   
10  13814440185036800          0.0          0.0          

In [5]:
df_transformed_AGBflux =df_F_AGBflux_sum.melt(id_vars=["GridNum10km","AreaHa"], 
     var_name="year", 
        value_name="GDMP")
## update date format:
df_transformed_AGBflux['year'] = pd.to_datetime(df_transformed_AGBflux['year'], format='%d-%m-%Y')
print(df_transformed_AGBflux.head(20))

          GridNum10km   AreaHa       year         GDMP
0   22821536360562688  10000.0 2014-11-01          0.0
1    9860785350115328  10000.0 2014-11-01          0.0
2   27660980955643904  10000.0 2014-11-01          0.0
3   18883523796533248   9429.0 2014-11-01          0.0
4   14521696744636416  10000.0 2014-11-01          0.0
5    4842545561468928  10000.0 2014-11-01          0.0
6    5207660731301888  10000.0 2014-11-01          0.0
7   14657941697200128  10000.0 2014-11-01          0.0
8    5144713690611712  10000.0 2014-11-01          0.0
9    9958586050412544  10000.0 2014-11-01          0.0
10  13814440185036800  10000.0 2014-11-01          0.0
11  10176495511142400  10000.0 2014-11-01          0.0
12  27731023282307072  10000.0 2014-11-01          0.0
13   5507681376796672  10000.0 2014-11-01          0.0
14   6021290745921536  10000.0 2014-11-01          0.0
15  27977696139018240  10000.0 2014-11-01   97412981.0
16  14185147402289152  10000.0 2014-11-01  762397386.0
17  189657

In [3]:
import sqlalchemy as sa
from sqlalchemy import create_engine, event
from sqlalchemy.engine.url import URL

## Reading WILDFIRE data at 10km:
print ("connect to engine......")
engine = sa.create_engine('mssql+pyodbc://' + "GREENMONKEY" + '/' + "Climate_Impact" + '?trusted_connection=yes&driver=ODBC+Driver+13+for+SQL+Server')

print ("connection to greenmonkey found")
print ("Reading wilfdire table from greenmonkey")
query=('''
SELECT     
      [GridNum10km]   
      ,[fire_date]
      ,sum([burned_area]) as [burned_area]
  FROM [Climate_Impact].[wild_fire].[wild_fires_2000_2022_light]
      
group by 
    [GridNum10km]
    ,[fire_date]
''')

df_wildfire10km = pd.read_sql(query, engine)
print(df_wildfire10km.head())

connect to engine......
connection to greenmonkey found
Reading wilfdire table from greenmonkey
    GridNum10km  fire_date  burned_area
0  9.714198e+15 2005-01-01          106
1  2.308425e+16 2020-02-01          334
2  1.840844e+16 2020-07-01          221
3  9.694897e+15 2011-10-01          674
4  9.695021e+15 2011-10-01            7


### 2. Computing variables

#### 2.1. GDMP anomalies

In [6]:
from  scipy.stats import zscore
def z_score(x):
   z = np.around(zscore(x),1)
   return z

#df['Z_SCORE'] = df.groupby('GROUP')['VALUE'].transform(z_score)
df_transformed_AGBflux['zscore'] = df_transformed_AGBflux.groupby('GridNum10km')['GDMP'].transform(z_score)

In [7]:
GDMP = df_transformed_AGBflux[df_transformed_AGBflux['GDMP']>0]
GDMP.head()

Unnamed: 0,GridNum10km,AreaHa,year,GDMP,zscore
15,27977696139018240,10000.0,2014-11-01,97412981.0,0.8
16,14185147402289152,10000.0,2014-11-01,762397386.0,1.4
17,18965789600120832,10000.0,2014-11-01,588245574.0,0.3
18,27603260890152960,10000.0,2014-11-01,290296014.0,-1.0
19,9731949216137216,10000.0,2014-11-01,757097776.0,0.3


In [8]:
# create a list of our conditions
conditions = [
    (GDMP['zscore'] <= -1),
    (GDMP['zscore'] > -1) & (GDMP['zscore'] <= 1),
    (GDMP['zscore'] > 1)
    ]
     
# create a list of the values we want to assign for each condition
values = ['-1', '0', '+1']

# create a new column and use np.select to assign values to it using our lists as arguments
GDMP['ANOMALIES'] = np.select(conditions, values)

# display updated DataFrame
GDMP.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if sys.path[0] == '':


Unnamed: 0,GridNum10km,AreaHa,year,GDMP,zscore,ANOMALIES
15,27977696139018240,10000.0,2014-11-01,97412981.0,0.8,0
16,14185147402289152,10000.0,2014-11-01,762397386.0,1.4,1
17,18965789600120832,10000.0,2014-11-01,588245574.0,0.3,0
18,27603260890152960,10000.0,2014-11-01,290296014.0,-1.0,-1
19,9731949216137216,10000.0,2014-11-01,757097776.0,0.3,0


##### Export table

In [9]:
name_of_table ="GDMP_Anomalies_10k"
     
print ("connect to greenmonkey engine.....GREENMONKEY.")  
engine = sa.create_engine('mssql+pyodbc://' + "GREENMONKEY" + '/' + "Climate_Impact" + '?trusted_connection=yes&driver=ODBC+Driver+13+for+SQL+Server')
#print ("send table togreenmonkey MS-SQL:")
GDMP.to_sql(name_of_table, engine, if_exists='replace', index = False, schema=None)


connect to greenmonkey engine.....GREENMONKEY.


##### QC GDMP anomalies

In [10]:
print (df_transformed_AGBflux.head())

         GridNum10km   AreaHa       year  GDMP  zscore
0  22821536360562688  10000.0 2014-11-01   0.0     NaN
1   9860785350115328  10000.0 2014-11-01   0.0     NaN
2  27660980955643904  10000.0 2014-11-01   0.0     NaN
3  18883523796533248   9429.0 2014-11-01   0.0     NaN
4  14521696744636416  10000.0 2014-11-01   0.0     NaN


In [11]:
df_transformed_AGBflux[df_transformed_AGBflux['GridNum10km']== 22821536360562688]

Unnamed: 0,GridNum10km,AreaHa,year,GDMP,zscore
0,22821536360562688,10000.0,2014-11-01,0.0,
302889,22821536360562688,10000.0,2015-11-01,0.0,
605778,22821536360562688,10000.0,2016-11-01,0.0,
908667,22821536360562688,10000.0,2017-11-01,0.0,
1211556,22821536360562688,10000.0,2018-11-01,0.0,
1514445,22821536360562688,10000.0,2019-11-01,0.0,
1817334,22821536360562688,10000.0,2020-11-01,0.0,
2120223,22821536360562688,10000.0,2021-11-01,0.0,


In [12]:
df_transformed_AGBflux[df_transformed_AGBflux['GridNum10km']== 18446609658413056]

Unnamed: 0,GridNum10km,AreaHa,year,GDMP,zscore
114716,18446609658413056,10000.0,2014-11-01,567420786.0,-0.7
417605,18446609658413056,10000.0,2015-11-01,554946035.0,-1.1
720494,18446609658413056,10000.0,2016-11-01,591739566.0,0.2
1023383,18446609658413056,10000.0,2017-11-01,556189307.0,-1.1
1326272,18446609658413056,10000.0,2018-11-01,626023208.0,1.4
1629161,18446609658413056,10000.0,2019-11-01,597534185.0,0.4
1932050,18446609658413056,10000.0,2020-11-01,634120631.0,1.6
2234939,18446609658413056,10000.0,2021-11-01,565236683.0,-0.7


In [13]:
GDMP_grid1=df_transformed_AGBflux[df_transformed_AGBflux['GridNum10km']== 18446609658413056]
avg = GDMP_grid1['GDMP'].mean()
std = GDMP_grid1['GDMP'].std()
#GDMP_grid2['zscore'] = round(((GDMP_grid2['GDMP']-avg)/std),1)
GDMP_grid1.loc[:,'zscore'] = round(((GDMP_grid1['GDMP']-avg)/std),1)
GDMP_grid1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(ilocs[0], value, pi)


Unnamed: 0,GridNum10km,AreaHa,year,GDMP,zscore
114716,18446609658413056,10000.0,2014-11-01,567420786.0,-0.6
417605,18446609658413056,10000.0,2015-11-01,554946035.0,-1.0
720494,18446609658413056,10000.0,2016-11-01,591739566.0,0.2
1023383,18446609658413056,10000.0,2017-11-01,556189307.0,-1.0
1326272,18446609658413056,10000.0,2018-11-01,626023208.0,1.3
1629161,18446609658413056,10000.0,2019-11-01,597534185.0,0.4
1932050,18446609658413056,10000.0,2020-11-01,634120631.0,1.5
2234939,18446609658413056,10000.0,2021-11-01,565236683.0,-0.7


In [14]:
zsc= stats.zscore(GDMP_grid['GDMP'])
zsc

NameError: name 'GDMP_grid' is not defined

In [None]:
GDMP_grid2=df_transformed_AGBflux[df_transformed_AGBflux['GridNum10km']== 9731949216137216]
avg = GDMP_grid2['GDMP'].mean()
std = GDMP_grid2['GDMP'].std()
#GDMP_grid2['zscore'] = round(((GDMP_grid2['GDMP']-avg)/std),1)
GDMP_grid2.loc[:,'zscore'] = round(((GDMP_grid2['GDMP']-avg)/std),1)
GDMP_grid2

In [None]:
GDMP_grid2=df_transformed_AGBflux[df_transformed_AGBflux['GridNum10km']== 9731949216137216]
avg = GDMP_grid2['GDMP'].mean()
std = GDMP_grid2['GDMP'].std()
GDMP_grid2['zscore'] = round(((GDMP_grid2['GDMP']-avg)/std),1)
GDMP_grid2

In [None]:
GDMP_grid3=df_transformed_AGBflux[df_transformed_AGBflux['GridNum10km']== 27603260890152960]
avg = GDMP_grid3['GDMP'].mean()
std = GDMP_grid3['GDMP'].std()
GDMP_grid3['zscore'] = round(((GDMP_grid3['GDMP']-avg)/std),1)
GDMP_grid3

In [None]:
avg = GDMP_grid['GDMP'].mean()
std = GDMP_grid['GDMP'].std()
GDMP_grid['zscore'] = round(((GDMP_grid['GDMP']-avg)/std),1)
GDMP_grid

In [None]:
from  scipy.stats import zscore

In [None]:
z = np.around(zscore(GDMP_grid1['GDMP']),1)
z

In [None]:
z = np.round_(zscore(GDMP_grid1['GDMP']),1)
z

In [None]:
GDMP_grid=df_transformed_AGBflux[(df_transformed_AGBflux['GridNum10km']== 18446609658413056) | (df_transformed_AGBflux['GridNum10km']== 9731949216137216) | (df_transformed_AGBflux['GridNum10km']== 27603260890152960)]
GDMP_grid

In [None]:
def z_score(x):
   z = np.around(zscore(x),1)
   return z

#df['Z_SCORE'] = df.groupby('GROUP')['VALUE'].transform(z_score)
GDMP_grid['zscore'] = GDMP_grid.groupby('GridNum10km')['GDMP'].transform(z_score)

print (GDMP_grid)

In [None]:
# create a list of our conditions
conditions = [
    (GDMP_grid['zscore'] <= -1),
    (GDMP_grid['zscore'] > -1) & (GDMP_grid['zscore'] <= 1),
    (GDMP_grid['zscore'] > 1)
    ]
     
# create a list of the values we want to assign for each condition
values = ['-1', '0', '+1']

# create a new column and use np.select to assign values to it using our lists as arguments
GDMP_grid['ANOMALIES'] = np.select(conditions, values)

# display updated DataFrame
GDMP_grid.head()

In [None]:
GDMP_grid