In [None]:
import pandas as pd
import mplfinance as mpf
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
from shapely.geometry import Point
import rasterio
from rasterio.mask import mask
from rasterio.features import geometry_mask
from scipy import fftpack
import matplotlib.dates as mdates
from scipy.signal import periodogram
from scipy.fft import fft, fftfreq
from scipy.signal import periodogram, detrend
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import adfuller


In [None]:
path = ''
data = pd.read_csv(path, parse_dates=['date'])
data = data.set_index('date')
data['soil_moisture'] = pd.to_numeric(data['soil_moisture'])

ldata = data[(data['latitude'] == 41.948936) & (data['longitude'] == -93.687760)]
print(ldata.head())

In [None]:
plt.figure(figsize=(14, 7))
plt.plot(ldata.index, ldata['soil_moisture'], label='Soil Moisture', color='blue')
plt.title('Soil Moisture: Latitude 41.948936, Longitude -93.687760')
plt.xlabel('Date')
plt.ylabel('Soil Moisture')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
adf_test = adfuller(ldata['soil_moisture'])
print(f"ADF : {adf_test[0]}")
print(f"p-value: {adf_test[1]}")

In [None]:
print((7000*3/24)/365)

In [None]:
print(len(ldata['soil_moisture'])/2)

In [None]:
plt.figure(figsize=(10, 6))
plot_acf(ldata['soil_moisture'], lags=6700)
plt.title('Autocorrelation Plot of Surface Soil Moisture')
plt.xlabel('3 Hour Lags')
plt.ylabel('Autocorrelation')
plt.show()

In [None]:
ldata['soil_moisture_diff'] = ldata['soil_moisture'].diff().dropna()

plt.figure(figsize=(10, 6))
plt.plot(ldata.index, ldata['soil_moisture_diff'], color='blue')
plt.title('Differenced Soil Moisture Over Time')
plt.xlabel('Date')
plt.ylabel('Differenced Soil Moisture')
plt.grid(True)
plt.show()


In [None]:
seasonal_period = 120
decomposition = seasonal_decompose(ldata['soil_moisture'], model='additive', period=seasonal_period)
decomposition.plot()
plt.show()

trend_component = decomposition.trend
seasonal_component = decomposition.seasonal
residual_component = decomposition.resid


In [None]:

plot_acf(ldata['soil_moisture_diff'].dropna(), lags=120)
plot_pacf(ldata['soil_moisture_diff'].dropna(), lags=120)
plt.show()


In [None]:
from scipy import fftpack
from scipy.fft import fft, fftfreq

#ldata.dropna(inplace=True)
soil_moisture_diff = ldata['soil_moisture_diff'].values - np.mean(ldata['soil_moisture_diff'].values)

timestep = 3  
Fs = 1 / timestep  
n = len(soil_moisture_diff)
t = np.arange(n) * timestep  
y_fft = fft(soil_moisture_diff)
fr = fftfreq(n, d=timestep)[:n//2]
y_m = 2/n * np.abs(y_fft[:n//2])

frequencies, power_spectral_density = periodogram(soil_moisture_diff, fs=Fs, window='flattop')

'''plt.figure(figsize=(14, 7))
plt.plot(ldata.index, ldata['soil_moisture_diff'], label='Differenced Soil Moisture', color='red')
plt.title('Differenced Soil Moisture Over Time at Latitude 41.948936, Longitude -93.687760')
plt.xlabel('Date')
plt.ylabel('Differenced Soil Moisture')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()'''


fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(15, 5))


ax[0].plot(t, soil_moisture_diff)
ax[0].set_title('Time Series')
ax[0].set_xlabel('Time (3-hour intervals)')
ax[0].set_ylabel('Differenced Soil Moisture')
ax[0].grid(True)
ax[1].stem(fr, y_m)
ax[1].set_title('Frequency Domain')
ax[1].set_xlabel('Frequency (1/Hours)')
ax[1].set_ylabel('Magnitude')
ax[1].grid(True)

plt.figure(figsize=(14, 7))
plt.semilogy(frequencies, power_spectral_density)
plt.title('Periodogram')
plt.xlabel('Frequency (1/Hours)')
plt.ylabel('Power Spectral Density')
plt.grid(True)
plt.tight_layout()
plt.show()
plt.tight_layout()
plt.show()

In [None]:
# Find the dominant frequencies
#Pretty obvious jump at approx 0.018 freq corresponding to 55 hours - implying 2 day dry cycle, seasonality. 
dominant_frequencies = fr[np.where(y_m > np.percentile(y_m, 95))] 
dominant_periods = 1 / dominant_frequencies  
print(f"Noise power : {np.mean(power_spectral_density[40:])}")
print("Dominant Frequencies and Corresponding Periods (in hours):")
for freq, period in zip(dominant_frequencies, dominant_periods):
    print(f"Frequency: {freq:.3f} 1/hours, Period: {period:.3f} hours")


In [None]:
dominant_frequencies = frequencies[np.where(power_spectral_density > np.percentile(power_spectral_density, 95))]
dominant_periods = 1 / dominant_frequencies

print("Dominant Frequencies and Corresponding Periods (in hours):")
for freq, period in zip(dominant_frequencies, dominant_periods):
    print(f"Frequency: {freq:.3f} 1/hours, Period: {period:.3f} hours")

In [None]:
path = '/Users/avalottig/Desktop/Computer-Science-MSc/ADIA-Project/Code/MScProject/data/SPL4SMAU_2020-2023/smap_iowa_2020_2023_rootzone.csv'
data = pd.read_csv(path, parse_dates=['date'])
data = data.set_index('date')
data['soil_moisture'] = pd.to_numeric(data['soil_moisture'])

yeardata = data[(data['latitude'] == 41.948936) & (data['longitude'] == -93.687760)]
print(yeardata.head())

In [None]:
plt.figure(figsize=(14, 7))
plt.plot(yeardata.index, yeardata['soil_moisture'], label='Soil Moisture', color='blue')
plt.title('Soil Moisture: Latitude 41.948936, Longitude -93.687760')
plt.xlabel('Date')
plt.ylabel('Soil Moisture')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


In [None]:
yeardata['soil_moisture_diff'] = yeardata['soil_moisture'].diff()
plt.figure(figsize=(14, 7))
plt.plot(yeardata.index, yeardata['soil_moisture_diff'], label='Differenced Soil Moisture', color='red')
plt.title('Differenced Soil Moisture: Latitude 41.948936, Longitude -93.687760')
plt.xlabel('Date')
plt.ylabel('Differenced Soil Moisture')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
from scipy import fftpack
from scipy.fft import fft, fftfreq

yeardata.dropna(inplace=True)
soil_moisture_diff = yeardata['soil_moisture_diff'].values - np.mean(yeardata['soil_moisture_diff'].values)
soil_moisture_diff = yeardata['soil_moisture_diff'].values
timestep = 3  
Fs = 1 / timestep  
n = len(soil_moisture_diff)
t = np.arange(n) * timestep  
y_fft = fft(soil_moisture_diff)
fr = fftfreq(n, d=timestep)[:n//2]
y_m = 2/n * np.abs(y_fft[:n//2])

frequencies, power_spectral_density = periodogram(soil_moisture_diff, fs=Fs, window='flattop')

'''plt.figure(figsize=(14, 7))
plt.plot(ldata.index, ldata['soil_moisture_diff'], label='Differenced Soil Moisture', color='red')
plt.title('Differenced Soil Moisture Over Time at Latitude 41.948936, Longitude -93.687760')
plt.xlabel('Date')
plt.ylabel('Differenced Soil Moisture')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()'''


fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(15, 5))


ax[0].plot(t, soil_moisture_diff)
ax[0].set_title('Time Series')
ax[0].set_xlabel('Time (3-hour intervals)')
ax[0].set_ylabel('Differenced Soil Moisture')
ax[0].grid(True)
ax[1].stem(fr, y_m)
ax[1].set_title('Frequency Domain')
ax[1].set_xlabel('Frequency (1/Hours)')
ax[1].set_ylabel('Magnitude')
ax[1].grid(True)

plt.figure(figsize=(14, 7))
plt.semilogy(frequencies, power_spectral_density)
plt.title('2020 - Periodogram')
plt.xlabel('2020 - Frequency (1/Hours)')
plt.ylabel('2020 - Power Spectral Density')
plt.grid(True)
plt.tight_layout()
plt.show()
plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.fft import fft, fftfreq


path = ''
data = pd.read_csv(path, parse_dates=['date'])
data.set_index('date', inplace=True)
data['soil_moisture'] = pd.to_numeric(data['soil_moisture'])
ldata = data[(data['latitude'] == 41.948936) & (data['longitude'] == -93.687760)]


soil_moisture_diff = ldata['soil_moisture'].diff().dropna().values

timestep = 3 
Fs = 1 / timestep  


n = len(soil_moisture_diff)
y_fft = fft(soil_moisture_diff)
fr = fftfreq(n, d=timestep)[:n//2]
y_m = 2/n * np.abs(y_fft[:n//2])


top_indices = np.argsort(y_m)[-5:][::-1]
top_frequencies = fr[top_indices]
top_magnitudes = y_m[top_indices]
top_periods = 1 / top_frequencies


print("Top 5 Frequencies and Their Corresponding Time Periods:")
for i, (freq, period, magnitude) in enumerate(zip(top_frequencies, top_periods, top_magnitudes)):
    print(f"{i+1}. Frequency: {freq:.6f} (1/hours) - Period: {period:.2f} hours - Magnitude: {magnitude:.6f}")


fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(15, 5))

ax[0].plot(np.arange(n) * timestep, soil_moisture_diff, color='red')
ax[0].set_title('Differenced Soil Moisture Over Time')
ax[0].set_xlabel('Time (3-hour intervals)')
ax[0].set_ylabel('Differenced Soil Moisture')
ax[0].grid(True)

ax[1].stem(fr, y_m)
ax[1].set_title('Frequency Domain')
ax[1].set_xlabel('Frequency (1/Hours)')
ax[1].set_ylabel('Magnitude')
ax[1].grid(True)

'''for freq, magnitude in zip(top_frequencies, top_magnitudes):
    ax[1].annotate(f"{freq:.6f}", (freq, magnitude), textcoords="offset points", xytext=(0,10), ha='center', fontsize=8, color='red')'''

plt.tight_layout()
plt.show()


Notes for yearly fourier decomposition/transform:
- Clear peak at approx 24H and 12H 
- Evidence of monthly, and 2 monthly seasonality 
- No clear cycles at <12

Notes for periodogram:
- Consistent seasonality and dominant frequencies observed. 
- Strange peaks at 0.1125 and approx 0.13 - TO DO: investigate further


In [None]:
print(1500/24)

In [None]:
dominant_frequencies = frequencies[np.where(power_spectral_density > np.percentile(power_spectral_density, 95))]
dominant_periods = 1 / dominant_frequencies
print("Dominant Frequencies and Corresponding Periods (in hours):")
for freq, period in zip(dominant_frequencies, dominant_periods):
    print(f"Frequency: {freq:.3f} 1/hours, Period: {period:.3f} hours")


In [None]:


'''def decompose_and_plot(yeardata, title_suffix='Seasonal Decomposition for Yearly Data - 3 Hr Resolution'):
    multiplicative_decomposition = seasonal_decompose(, model='multiplicative', period=18)
    additive_decomposition = seasonal_decompose(df_daily_avg, model='additive', period=30)

    # Plot
    plt.rcParams.update({'figure.figsize': (16, 12)})



    plt.subplot(2, 1, 2)
    additive_decomposition.plot()
    plt.suptitle(f'Additive Decomposition {title_suffix}', fontsize=16)
    plt.tight_layout(rect=[0, 0.03, 1, 0.95])

    plt.show()'''


multiplicative_decomp = seasonal_decompose(yeardata['soil_moisture'], model='additive', period=18)

plt.figure(figsize=(14, 7))
multiplicative_decomp.plot()
plt.suptitle(f'Multiplicative Decomposition', fontsize=16)
plt.grid(True)
plt.tight_layout()
plt.show()



Questions to cover in presentation:

- Why do we see peak at 0 frequency for periodogram?
- Why does flattop work but not hann, hamming and boxcar?


In [None]:
soil_moisture_diff = ldata['soil_moisture_diff'].values
soil_moisture_diff_mean_removed = soil_moisture_diff - np.mean(soil_moisture_diff)

timestep = 3  # 3-hour intervals
Fs = 1 / timestep  # Sampling rate
n = len(soil_moisture_diff_mean_removed)
t = np.arange(n) * timestep  # time in hours

# Perform Fourier transform using scipy
y_fft = fft(soil_moisture_diff_mean_removed)
fr = fftfreq(n, d=timestep)[:n//2]
y_m = 2/n * np.abs(y_fft[:n//2])
windows = ['hann', 'hamming', 'flattop']
detrending = ['constant', 'linear']
all_frequencies = []
all_psd = []
for window in windows:
    for method in detrending:
        frequencies, power_spectral_density = periodogram(
            soil_moisture_diff_mean_removed, fs=Fs, window=window, detrend=method
        )
        all_frequencies.append(frequencies)
        all_psd.append(power_spectral_density)


dominant_frequencies = []
dominant_psd_values = []
for frequencies, psd in zip(all_frequencies, all_psd):
    dominant_freqs = frequencies[np.where(psd > np.percentile(psd, 95))]
    dominant_psd_vals = psd[np.where(psd > np.percentile(psd, 95))]
    dominant_frequencies.extend(dominant_freqs)
    dominant_psd_values.extend(dominant_psd_vals)


dominant_frequencies = np.array(dominant_frequencies)
dominant_psd_values = np.array(dominant_psd_values)
sorted_indices = np.argsort(-dominant_psd_values)  
sorted_frequencies = dominant_frequencies[sorted_indices]
sorted_psd_values = dominant_psd_values[sorted_indices]
sorted_periods = 1 / sorted_frequencies


print("Dominant Frequencies, Periods (in hours):")
for freq, period, psd in zip(sorted_frequencies, sorted_periods, sorted_psd_values):
    print(f"Frequency: {freq:.3f} 1/hours, Period: {period:.3f} hours, PSD: {psd:.3e}")


plt.figure(figsize=(14, 7))
plt.semilogy(all_frequencies[0], all_psd[0], label=f'{windows[0]} window, {detrending[0]} detrending')
plt.title('Periodogram')
plt.xlabel('Frequency (1/Hours)')
plt.ylabel('Power Spectral Density')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
