# Time Series

---

In [23]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## Data

In [14]:
df = pd.read_csv("../data/Acea-Smart-Water/Aquifer_Pretignano/raw/Aquifer_Petrignano.csv")

df.head(3)

Unnamed: 0,Date,Rainfall_Bastia_Umbra,Depth_to_Groundwater_P24,Depth_to_Groundwater_P25,Temperature_Bastia_Umbra,Temperature_Petrignano,Volume_C10_Petrignano,Hydrometry_Fiume_Chiascio_Petrignano
0,14/03/2006,,-22.48,-22.18,,,,
1,15/03/2006,,-22.38,-22.14,,,,
2,16/03/2006,,-22.25,-22.04,,,,


In [15]:
# Obtain dimensions

rows, columns = df.shape

print(f"The dimensions of this dataset are: {rows} Rows and {columns} Columns")

The dimensions of this dataset are: 5223 Rows and 8 Columns


In [16]:
# Remove old rows
df = df[df.Rainfall_Bastia_Umbra.notna()].reset_index(drop=True)
# Remove not usefull columns
df = df.drop(['Depth_to_Groundwater_P24', 'Temperature_Petrignano'], axis=1)

In [17]:
# Obtain dimensions

rows, columns = df.shape

print(f"The dimensions of this dataset are: {rows} Rows and {columns} Columns")

The dimensions of this dataset are: 4199 Rows and 6 Columns


In [18]:
df.head(3)

Unnamed: 0,Date,Rainfall_Bastia_Umbra,Depth_to_Groundwater_P25,Temperature_Bastia_Umbra,Volume_C10_Petrignano,Hydrometry_Fiume_Chiascio_Petrignano
0,01/01/2009,0.0,-31.14,5.2,-24530.688,2.4
1,02/01/2009,0.0,-31.11,2.3,-28785.888,2.5
2,03/01/2009,0.0,-31.07,4.4,-25766.208,2.4


In [19]:
# Obtain information about data types and non-null values

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4199 entries, 0 to 4198
Data columns (total 6 columns):
 #   Column                                Non-Null Count  Dtype  
---  ------                                --------------  -----  
 0   Date                                  4199 non-null   object 
 1   Rainfall_Bastia_Umbra                 4199 non-null   float64
 2   Depth_to_Groundwater_P25              4172 non-null   float64
 3   Temperature_Bastia_Umbra              4199 non-null   float64
 4   Volume_C10_Petrignano                 4198 non-null   float64
 5   Hydrometry_Fiume_Chiascio_Petrignano  4199 non-null   float64
dtypes: float64(5), object(1)
memory usage: 197.0+ KB


In [None]:
# Cambiar/Simplificar el nombre de las columnas

df.columns = ['date', 'rainfall', 'depth_to_groundwater', 'temperature', 'drainage_volume', 'river_hydrometry']
df.head(3)

Unnamed: 0,date,rainfall,depth_to_groundwater,temperature,drainage_volume,river_hydrometry
0,01/01/2009,0.0,-31.14,5.2,-24530.688,2.4
1,02/01/2009,0.0,-31.11,2.3,-28785.888,2.5
2,03/01/2009,0.0,-31.07,4.4,-25766.208,2.4


In [None]:
targets = ['depth_to_groundwater']
features = [feature for feature in df.columns if feature not in targets]
#                               | |
# features = ['date', 'rainfall', 'temperature', 'drainage_volume', 'river_hydrometry'] es lo mismo
features

['date', 'rainfall', 'temperature', 'drainage_volume', 'river_hydrometry']

---

## Data Visualization

Features:

- `Rainfall` indicates the quantity of rain falling (mm)

- `Temperature` indicates the temperature (°C)

- `Volume` indicates the volume of water taken from the drinking water treatment plant (m 3 )

- `Hydrometry` indicates the groundwater level (m)


Target:

`Depth to Groundwater` indicates the groundwater level (m from the ground floor)

In [None]:
# To complete the data, as naive method, we will use ffill (Fill NA/NaN values by propagating the last valid observation to next valid.)

f, ax = plt.subplots(nrows=5, ncols=1, figsize=(15, 25))

for i, column in enumerate(df.drop('date', axis=1).columns):
    sns.lineplot(x=df['date'], y=df[column].fillna(method='ffill'), ax=ax[i], color='dodgerblue')
    ax[i].set_title('Feature: {}'.format(column), fontsize=14)
    ax[i].set_ylabel(ylabel=column, fontsize=14)
                      
    ax[i].set_xlim([date(2009, 1, 1), date(2020, 6, 30)])  

## Data Preprocessing