# Thomas Dairy Automatic Update of Continous Data

In [75]:
import requests
import os
import shutil
import pandas as pd

### Download and Replace Data

#### TD1 Soil Moisture 15 cm

In [76]:
# Define the URL of the new file to download
new_file_url_TD1_SoilMoisture_Top15cm = 'https://monitormywatershed.org/api/csv-values/?result_id=7638'

# Define the local folder for the PowerBI files
download_folder = 'U:/Research_and_Innovation/_ModellingGroupProjects/ThomasDairy_Continuous Data/ThomasDairy_Data/ThomasDairy1'

# Create the full path for the new file
new_file_path = os.path.join(download_folder, 'CWS-WQ-ThomasDairy-1_GroPoint_Profile_Moisture_7638.csv')

# Download the file
try:
    response = requests.get(new_file_url_TD1_SoilMoisture_Top15cm, stream=True)
    response.raise_for_status()
    
    # Save the new file to the local folder (overwrite if it exists)
    with open(new_file_path, 'wb') as file:
        for chunk in response.iter_content(chunk_size=8192):
            file.write(chunk)

    print(f'New file downloaded and saved to: {new_file_path}')

except Exception as e:
    print(f'Error downloading the new file: {e}')

New file downloaded and saved to: U:/Research_and_Innovation/_ModellingGroupProjects/ThomasDairy_Continuous Data/ThomasDairy_Data/ThomasDairy1\CWS-WQ-ThomasDairy-1_GroPoint_Profile_Moisture_7638.csv


### Read New File and Transform into a Dataframe

In [77]:
# Read the CSV file into a dataframe
df = pd.read_csv(new_file_path, skiprows=64)

In [78]:
# Remove the first and second columns
df = df.iloc[:, 2:]

# Rename the remaining columns
df.rename(columns={df.columns[0]: 'DateTimePST', df.columns[1]: 'TD1_SoilMoisture_Top15cm'}, inplace=True)

# Display the resulting DataFrame
print(df.head())

           DateTimePST  TD1_SoilMoisture_Top15cm
0  2023-05-10 13:38:00                       0.1
1  2023-05-10 13:40:00                       0.1
2  2023-05-10 13:42:00                       0.1
3  2023-05-10 13:44:00                       0.1
4  2023-05-10 13:46:00                       0.1


In [79]:
# Convert the 'DateTime' column to datetime data type
df['DateTimePST'] = pd.to_datetime(df['DateTimePST'])

### Read Excel Workbook with Site Data Copilation

In [80]:
# Define the path to the existing Excel workbook with the compiled data
TD1_existing_file_path = 'U:/Research_and_Innovation/_ModellingGroupProjects/ThomasDairy_Continuous Data/ThomasDairy_Data/ThomasDairy1/ThomasDairy1_all.xlsx'

# Read the existing data 
existing_df = pd.read_excel(TD1_existing_file_path)


In [81]:
# Find the oldest datetime in the 'DateTime' column
oldest_datetime_existing = existing_df['DateTime'].max()

### Compare Existing Data to New Data

In [82]:
# Filter the DataFrame based on the datetime to show only rows where the datetime is 
# newer than the last datatime in the existing data
filtered_df = df[df['DateTimePST'] > oldest_datetime_existing]


In [83]:
filtered_df

Unnamed: 0,DateTimePST,TD1_SoilMoisture_Top15cm
45652,2023-09-19 07:54:00,67.2
45653,2023-09-19 07:56:00,67.2
45654,2023-09-19 07:58:00,67.2
45655,2023-09-19 08:00:00,67.2
45656,2023-09-19 08:02:00,67.1
...,...,...
113087,2023-12-26 14:48:00,89.5
113088,2023-12-26 14:50:00,89.5
113089,2023-12-26 14:52:00,89.5
113090,2023-12-26 14:54:00,89.5


### Append New Data to the Existing Excel Workbook

In [85]:
# Rename columns in 'filtered_df' to match 'existing_df'
filtered_df.rename(columns={'DateTimePST': 'DateTime', 'TD1_SoilMoisture_Top15cm': 'SoilMoisture_top15cm'}, inplace=True)

# Append the filtered data to the existing data
combined_df = pd.concat([existing_df, filtered_df], ignore_index=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.rename(columns={'DateTimePST': 'DateTime', 'TD1_SoilMoisture_Top15cm': 'SoilMoisture_top15cm'}, inplace=True)


In [87]:
combined_df.tail()

Unnamed: 0,DateTime,SoilMoisture_top15cm,SoilMoisture_15_30cm,SoilMoisture_30_45cm,SoilMoisture_45_60cm,SoilMoisture_60_75cm,SoilMoisture_75_90cm,SoilMoisture_90_105cm,SoilMoisture_105_120cm,SoilTemperature_3.5cm,...,SoilTemperature_80cm,SoilTemperature_90cm,SoilTemperature_100cm,SoilTemperature_110cm,SoilTemperature_120cm,gageHeight,pressureGauge,Keller_Temp,Y520-A_Cond,Y520-A_Temp
113095,2023-12-26 14:48:00,89.5,,,,,,,,,...,,,,,,,,,,
113096,2023-12-26 14:50:00,89.5,,,,,,,,,...,,,,,,,,,,
113097,2023-12-26 14:52:00,89.5,,,,,,,,,...,,,,,,,,,,
113098,2023-12-26 14:54:00,89.5,,,,,,,,,...,,,,,,,,,,
113099,2023-12-26 14:56:00,89.5,,,,,,,,,...,,,,,,,,,,


####TD1_Soil Moisture 15-30 cm

In [89]:
# Define the URL of the new file to download
new_file_url_TD1_SoilMoisture_15_30cm = 'https://monitormywatershed.org/api/csv-values/?result_id=7643'

# Define the local folder for the PowerBI files
download_folder = 'U:/Research_and_Innovation/_ModellingGroupProjects/ThomasDairy_Continuous Data/ThomasDairy_Data/ThomasDairy1'

# Create the full path for the new file
new_file_path = os.path.join(download_folder, 'CWS-WQ-ThomasDairy-1_GroPoint_Profile_Moisture_7643.csv')

# Download the file
try:
    response = requests.get(new_file_url_TD1_SoilMoisture_15_30cm, stream=True)
    response.raise_for_status()
    
    # Save the new file to the local folder (overwrite if it exists)
    with open(new_file_path, 'wb') as file:
        for chunk in response.iter_content(chunk_size=8192):
            file.write(chunk)

    print(f'New file downloaded and saved to: {new_file_path}')

except Exception as e:
    print(f'Error downloading the new file: {e}')

New file downloaded and saved to: U:/Research_and_Innovation/_ModellingGroupProjects/ThomasDairy_Continuous Data/ThomasDairy_Data/ThomasDairy1\CWS-WQ-ThomasDairy-1_GroPoint_Profile_Moisture_7643.csv


In [90]:
# Read the CSV file into a dataframe
df = pd.read_csv(new_file_path, skiprows=64)

In [91]:
# Remove the first and second columns
df = df.iloc[:, 2:]

# Rename the remaining columns
df.rename(columns={df.columns[0]: 'DateTimePST', df.columns[1]: 'TD1_SoilMoisture_15_30cm'}, inplace=True)

# Display the resulting DataFrame
print(df.head())

           DateTimePST  TD1_SoilMoisture_15_30cm
0  2023-05-10 13:38:00                       0.1
1  2023-05-10 13:40:00                       0.1
2  2023-05-10 13:42:00                       0.1
3  2023-05-10 13:44:00                       0.1
4  2023-05-10 13:46:00                       0.1


In [92]:
# Convert the 'DateTime' column to datetime data type
df['DateTimePST'] = pd.to_datetime(df['DateTimePST'])

In [95]:
# Match 'DateTimePST' values to 'DateTime' in 'combined_df' and update 'SoilMoisture_15_30cm'
for index, row in df.iterrows():
    datetime_pst = row['DateTimePST']
    value_15_30cm = row['TD1_SoilMoisture_15_30cm']
    
    # Find the row in 'combined_df' with matching 'DateTime' and update 'SoilMoisture_15_30cm'
    combined_df.loc[combined_df['DateTime'] == datetime_pst, 'SoilMoisture_15_30cm'] = value_15_30cm

### Save Updated File

In [97]:
# Save the 'combined_df' DataFrame to replace the existing Excel workbook
combined_df.to_excel('U:/Research_and_Innovation/_ModellingGroupProjects/ThomasDairy_Continuous Data/ThomasDairy_Data/ThomasDairy1/ThomasDairy1_all.xlsx', index=False)