<a href="https://colab.research.google.com/github/Max-FM/SPRINT-Colombia/blob/main/Visualising_District_Time_Series_Gradient.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Visualising the Gradient of Time Series Data for Each District

## Installing/Upgrading Packages

In [1]:
%%capture

!pip install plotly --upgrade

# Installing Orca - required to save to png using Google Colab.
!wget https://github.com/plotly/orca/releases/download/v1.2.1/orca-1.2.1-x86_64.AppImage -O /usr/local/bin/orca
!chmod +x /usr/local/bin/orca
!apt-get install xvfb libgtk2.0-0 libgconf-2-4

##Import Packages

In [2]:
import numpy as np
import pandas as pd
import plotly.graph_objs as go
from plotly.subplots import make_subplots

##Importing Table of Extreme Weather Data

In [3]:
extreme_weather_table = pd.read_excel('/content/drive/Shared drives/Colombia SPRINT/Test Districts/Colombia extreme weather in test districts.xlsx')

# Filling in empty values for dates.
extreme_weather_table['Day'].fillna(1, inplace=True)
extreme_weather_table['Month'].fillna(1, inplace=True)
extreme_weather_table['End Day'].fillna(1, inplace=True)
extreme_weather_table['End Month'].fillna(1, inplace=True)

# Dealing with duplicate dates.
extreme_weather_table.loc[0, 'End Year'] = 1999 # Assuming 1998 El Nino drought lasts about a year. 
extreme_weather_table.loc[0, 'End Day'] = 26 # Assuming 1998 El Nino drought lasts about a year. 
extreme_weather_table.loc[31, 'End Month'] = 7 # Assuming Jun 2020 El flood lasts about a month. 

# Converting date columns into a single datetime column.
extreme_weather_table['Start_Datetime'] = pd.to_datetime(extreme_weather_table['Start Year'].astype(str) + '-' +
                                          extreme_weather_table['Month'].astype(int).astype(str) + '-' +
                                          extreme_weather_table['Day'].astype(int).astype(str))

extreme_weather_table['End_Datetime'] = pd.to_datetime(extreme_weather_table['End Year'].astype(str) + '-' +
                                        extreme_weather_table['End Month'].astype(int).astype(str) + '-' +
                                        extreme_weather_table['End Day'].astype(int).astype(str))

# Adding in missing events.
extreme_weather_table = extreme_weather_table.append({'Disaster': 'Drought', 
                                                     'Start_Datetime': pd.to_datetime('2009-01-01'), 
                                                     'End_Datetime': pd.to_datetime('2010-01-01')},
                                                     ignore_index=True) \
                                             .sort_values(by='Start_Datetime') \
                                             .reset_index() # 2009 El Nino drought.

extreme_weather_table[['Disaster', 'Start_Datetime', 'End_Datetime']]

Unnamed: 0,Disaster,Start_Datetime,End_Datetime
0,Drought,1998-01-01,1999-01-26
1,Flood,1999-01-10,1999-05-19
2,Earthquake,1999-01-25,1999-01-25
3,Flood,1999-10-28,1999-12-31
4,Flood,2000-05-18,2000-05-24
5,Wildfire,2001-08-01,2001-08-01
6,Drought,2002-01-01,2003-01-01
7,Flood,2002-04-24,2002-04-29
8,Flood,2003-08-01,2003-12-01
9,Flood,2004-01-01,2004-06-28


## Define Import Functions for CSV Data

A series of functions to import CSV data aquired from the [Colombia Farms Time Series Data](https://colab.research.google.com/drive/1-_YUqAHe102ILURNULWhR1eBCMyB050-?usp=sharing) IPython notebook. I haven't made a generalied import function as each the datasets are not formatted uniformly.

In [4]:
def import_MODIS_TERRA_AQUA(district):
    directory = f'/content/drive/Shared drives/Colombia SPRINT/Test Districts/{district}/CSV Data'    
    
    # Imports and concatinates NDVI/EVI data for both the MODIS TERRA and AQUA
    # satellites.
    satellites = ['TERRA', 'AQUA']
    df_list = [pd.read_csv(f'{directory}/{district}_MODIS_16D_{satellite}_VEG.csv', index_col=0) for satellite in satellites]
    df = pd.concat(df_list)

    # Convert timestap to datetime object.
    df['Timestamp'] = df['Timestamp'].apply(pd.to_datetime)

    # Rescaling data columns from large integers to correct floating point value.
    columns_of_interest = ['EVI_median', 'EVI_stdDev','NDVI_median', 'NDVI_stdDev']
    df[columns_of_interest] *= 0.0001

    df = calculate_time_series_derivative(df, 'median')

    return df

def import_MODIS_COMBINED(district):
    directory = f'/content/drive/Shared drives/Colombia SPRINT/Test Districts/{district}/CSV Data'
    df = pd.read_csv(f'{directory}/{district}_MODIS_16D_COMBINED_NDVI.csv', index_col=0)
    
    df['Timestamp'] = df['Timestamp'].apply(pd.to_datetime)
    df.index = df['Timestamp']

    df = calculate_time_series_derivative(df, 'median')

    return df


def import_CHIRPS(district):
    directory = f'/content/drive/Shared drives/Colombia SPRINT/Test Districts/{district}/CSV Data'

    df = pd.read_csv(f'{directory}/{district}_CHIRPS_DAILY.csv', index_col=0)
    df['Timestamp'] = df['Timestamp'].apply(pd.to_datetime)
    df.index = df['Timestamp']

    df = calculate_time_series_derivative(df, 'median')

    return df

def import_PERSIANN(district):
    directory = f'/content/drive/Shared drives/Colombia SPRINT/Test Districts/{district}/CSV Data'

    df = pd.read_csv(f'{directory}/{district}_PERSIANN_DAILY.csv', index_col=0)
    df['Timestamp'] = df['Timestamp'].apply(pd.to_datetime)
    df.index = df['Timestamp']
    
    df = calculate_time_series_derivative(df, 'median')

    return df

def import_TerraClimate(district):
    directory = f'/content/drive/Shared drives/Colombia SPRINT/Test Districts/{district}/CSV Data'

    df = pd.read_csv(f'{directory}/{district}_TERRA_CLIMATE_MONTHLY.csv', index_col=0)
    df['Timestamp'] = df['Timestamp'].apply(pd.to_datetime)
    df.index = df['Timestamp']

    # Rescaling values from large integers into correct floating point values.
    columns_1 = ['aet_median', 'aet_stdDev', 
                 'def_median', 'def_stdDev', 
                 'pet_median', 'pet_stdDev', 
                 'soil_median', 'soil_stdDev', 
                 'srad_median', 'srad_stdDev', 
                 'tmmn_median', 'tmmn_stdDev', 
                 'tmmx_median', 'tmmx_stdDev']
    
    df[columns_1] *= 0.1
    
    columns_2 = ['vpd_median', 'vpd_stdDev',
                 'vs_median', 'vs_stdDev']
    df[columns_2] *= 0.01

    # In the documentation for TerraClimate monthly the pdsi scaling relation is
    # given as 0.01, however this puts the min/max values to be between 
    # approximately -40 and 40 whereas is should be between -4 and 4. So have
    # set the scaling factor to 0.001 instead.
    columns_3 = ['pdsi_median', 'pdsi_stdDev',
                 'vap_median', 'vap_stdDev']
    df[columns_3] *= 0.001

    df = calculate_time_series_derivative(df, 'median')
    
    return df

def import_IMERG(district):
    directory = f'/content/drive/Shared drives/Colombia SPRINT/Test Districts/{district}/CSV Data'

    df = pd.read_csv(f'{directory}/{district}_IMERG_MONTHLY.csv', index_col=0)
    df['Timestamp'] = df['Timestamp'].apply(pd.to_datetime)
    df.index = df['Timestamp']

    df = calculate_time_series_derivative(df, 'median')

    return df

# Calculates the time derivative for an aggregated quantity.
def calculate_time_series_derivative(dataframe, aggregation):
    filtered = dataframe.filter(like=f'{aggregation}', axis=1)

    for column_name, column_data in filtered.iteritems():
        dataframe[f'{column_name}_dt'] = pd.Series(np.gradient(column_data.values), column_data.index, name='slope')

    return dataframe

## Define Timeseries Plotting Function

In [5]:
def plot_interactive_time_series(district):
    MODIS_AQUA_TERRA_df = import_MODIS_TERRA_AQUA(district)
    CHIRPS_df = import_CHIRPS(district)
    PERSIANN_df = import_PERSIANN(district)
    TerraClimate_df = import_TerraClimate(district)

    fig = make_subplots(rows=3, cols=1, 
                        shared_xaxes=True, 
                        vertical_spacing=0.02,
                        y_title='$\partial/\partial t$')

    ### Plotting MODIS 16D NDVI data ###
    nameDict ={'MODIS_16D_AQUA_VEG': 'Modis Aqua 250m 16 Day NDVI',
               'MODIS_16D_TERRA_VEG': 'Modis Terra 250m 16 Day NDVI'}

    # Grouping NDVI data by satellitle (TERRA and AQUA) and plotting.
    for collection_name, collection in MODIS_AQUA_TERRA_df.groupby('Image_Collection'):

        # Create scatter plot.
        NDVI_scatter = go.Scatter(
                                  x=collection['Timestamp'], 
                                  y=collection['NDVI_median_dt'], 
                                  name=nameDict[collection_name], 
                                  mode='lines+markers'
                                  )
        # Add scatter plot to figure.
        fig.add_trace(NDVI_scatter, row=1, col=1)

    ### Plotting CHIRPS Precipitation data ###
    CHIRPS_scatter = go.Scatter(
                                x=CHIRPS_df['precipitation_median_dt'].resample('14D').median().index, 
                                y=CHIRPS_df['precipitation_median_dt'].resample('14D').median(), 
                                marker_color='green',
                                name='CHIRPS Precipitation - 14 Day Rolling Median (mm)', 
                                mode='lines+markers'
                                )

    fig.add_trace(CHIRPS_scatter, row=2, col=1)

    ### Plotting PERSIANN Precipitation data ###
    PERSIANN_scatter = go.Scatter(
                                  x=PERSIANN_df['precipitation_median_dt'].resample('14D').median().index, 
                                  y=PERSIANN_df['precipitation_median_dt'].resample('14D').median(), 
                                  marker_color='black',
                                  name='PERSIANN Precipitation - 14 Day Rolling Median (mm)', 
                                  mode='lines+markers'
                                  )
  
    fig.add_trace(PERSIANN_scatter, row=2, col=1)

    ### Plotting TerraClimate PDSI data ###
    TerraClimate_PDSI_Scatter = go.Scatter(
                                           x=TerraClimate_df.index, 
                                           y=TerraClimate_df['pdsi_median_dt'], 
                                           marker_color='purple',
                                           name='TerraClimate PDSI', 
                                           mode='lines+markers'
                                          )
  
    fig.add_trace(TerraClimate_PDSI_Scatter, row=3, col=1)


    ### Shading plot background to correspond to a disaster. ###
    disasterColourDict = {'Drought': 'orange',
                          'Earthquake': 'brown',
                          'Flood': 'darkturquoise', 
                          'Wildfire': 'maroon',
                          'Storm': 'grey'}
    
    # Generating a shape to be used as background shading for each disaster 
    # using list comprehension.
    shapes=[
            dict(type="rect",
                 yref="paper",
                 x0=disaster['Start_Datetime'].strftime('%Y-%m-%d'), 
                 y0=0,
                 x1=disaster['End_Datetime'].strftime('%Y-%m-%d'),
                 y1=1,
                 fillcolor=disasterColourDict[disaster['Disaster']],
                 opacity=1,
                 layer="below",
                 line_width=0.2) \
                 for index, disaster in extreme_weather_table.iterrows()
           ]
    
    # Add shapes to figure.
    fig.update_layout(shapes=shapes)

    ### Updating axes, title and legend. ###
    # Fixing y-axis range.
    fig.update_yaxes(fixedrange=True)  
    # Adding title.
    fig.update_layout(title=f'{district}')
    # Updating legend layout.
    fig.update_layout(legend=dict(
                                  orientation="h"
                                 ))        

    return fig

## Creating a Test Interactive Plot using Plotly

In [6]:
district = 'Dosquebradas'

fig = plot_interactive_time_series(district)

fig

## Creating Interactive Time Series Visualisations for Each District


In [7]:
for district_name in ['Dosquebradas', 'Versailles']:

    fig = plot_interactive_time_series(district_name)

    fig.write_html(f'/content/drive/Shared drives/Colombia SPRINT/Test Districts/{district_name}/{district_name}_time_series_gradient.html')

## Creating Static Plots for Each Farm By Year

In [8]:
years = np.arange(2000, 2021)

for district_name in ['Dosquebradas', 'Versailles']:
    display(district_name)
    
    fig = plot_interactive_time_series(district_name)

    for year in years:
        fig.update_xaxes(
                         range=[f'{year}-01-01',f'{year+1}-01-01'],  # sets the range of xaxis
                         constrain="domain",  # meanwhile compresses the xaxis by decreasing its "domain"
                        )

        filepath = f'/content/drive/Shared drives/Colombia SPRINT/Test Districts/{district_name}/Yearly Static Plots/{district_name}_{year}_gradient.png'

        fig.write_image(filepath)

'Dosquebradas'

'Versailles'