In [None]:
# mount drive
from google.colab import drive
drive.mount("/drive") 

Drive already mounted at /drive; to attempt to forcibly remount, call drive.mount("/drive", force_remount=True).


In [None]:
# imports
import numpy as np
import pandas as pd

In [None]:
# imports for plots
from plotly import graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.io as pio

pio.templates.default = "plotly_white"

In [None]:
# to make a detailed plot of decomposition
def plot_decompostion(decomposition_object, y, line_space):
  x = line_space
  d_object = decomposition_object

  fig = make_subplots(rows=3, cols=1,
                      subplot_titles=("Trend", "Seasonality", "Noise"))

  # time series & trend
  fig.add_trace(
      go.Scatter(x=x, y=y, name="initial time series"),
      row=1, col=1
  )

  fig.add_trace(
      go.Scatter(x=x, y=d_object.trend, name="trend"),
      row=1, col=1
  )

  # seasonality
  fig.add_trace(
      go.Scatter(x=x, y=d_object.seasonal, name="seasonal"),
      row=2, col=1
  )

  # noise
  fig.add_trace(
      go.Scatter(x=x, y=d_object.resid, name="Noise"),
      row=3, col=1
  )


  fig.show()
  return

In [None]:
# to make plots of noise
def plot_noise(arr_noise):
  fig = make_subplots(rows=1, cols=2,
                      subplot_titles=("Histogram Noise", "Box plot of Noise"))
  
  fig.add_trace(
      go.Histogram(x=arr_noise),
      row=1, col=1
  )

  fig.add_trace(
      go.Box(x=arr_noise),
      row=1, col=2
  )
  
  fig.update_layout(showlegend=False)
  fig.show()
  return

In [None]:
# analyse
# decomposition of columns
# frequency: make the analysis for one year
# each year comprises 365 record
from statsmodels.tsa.seasonal import seasonal_decompose


def analyse_column(data_to_analyse, col_name, model="additive", freq=365):
  # decompose time series
  decomposition = seasonal_decompose(data_to_analyse[col_name], model=model, freq=freq, extrapolate_trend='freq')

  # decomposition plot
  plot_decompostion(decomposition, y=data_to_analyse[col_name], line_space=data_to_analyse["DATE"])

  # histogram and box plot of resid
  plot_noise(decomposition.resid)

  # mean and std of trend and noise
  dict_data = { "trend": decomposition.trend, "noise": decomposition.resid }
  for key, arr in dict_data.items():
    print(f"""
      =========================
      {key}
      mean: {np.mean(arr)}
      std: {np.std(arr)}
    """)

  return

In [None]:
# load data
data = pd.read_csv("/drive/My Drive/Colab Notebooks/AgriEdge/weather_dataset/34.269355;-5.938411.csv")

# convert DATE from str to timestamp
data["DATE"] = data["DATE"].apply(lambda s: pd.Timestamp(s))

In [None]:
# select data to analyse
# dates
date_start = pd.Timestamp("2000-01-01")
date_end = pd.Timestamp("2015-01-01")

data_to_analyse = data[(date_start <= data["DATE"]) & (data["DATE"] < date_end)]

In [None]:
# example of max temperature analysis
analyse_column(data_to_analyse, col_name="T2M_MAX")


      trend
      mean: 26.467903518570772
      std: 0.5305591798000817
    

      noise
      mean: 0.018917217150585947
      std: 3.713041416286259
    
