# Thomas Dairy Automatic Update of Continous Data

In [2]:
import requests
import os
import pandas as pd
from io import StringIO

### Setting up Process and Functions

#### Download and Replace Data

In [5]:
def download_csv_to_dataframe(new_file_url, column_label):
    try:
        # Send a GET request to download the CSV data
        response = requests.get(new_file_url)
        response.raise_for_status()

        # Read the CSV content directly into a DataFrame using StringIO
        df = pd.read_csv(StringIO(response.text), skiprows=64, header=None)

        # Remove the first and second columns
        df = df.iloc[:, 2:]

        # Rename the remaining columns using the provided label
        df.rename(columns={df.columns[0]: 'DateTimePST', df.columns[1]: column_label}, inplace=True)

        # Convert the 'DateTime' column to datetime data type
        df['DateTimePST'] = pd.to_datetime(df['DateTimePST'])

        # Display the resulting DataFrame
        print(df.head())

        return df

    except Exception as e:
        print(f'Error downloading or processing the new file: {e}')



### Combine Data & Initialize DateTime Columns

In [10]:
def combine_data(existing_file_path, column_rename_dict,df):
    try:
        # Read the existing data from the Excel file
        existing_df = pd.read_excel(existing_file_path)

        # Find the oldest datetime in the 'DateTime' column
        oldest_datetime_existing = existing_df['DateTime'].max()

        # Filter the DataFrame based on the datetime to show only rows where the datetime is 
        # newer than the last datetime in the existing data
        filtered_df = df[df['DateTimePST'] > oldest_datetime_existing]

        # Rename columns in 'filtered_df' based on the provided column renaming dictionary
        filtered_df.rename(columns=column_rename_dict, inplace=True)

        # Append the filtered data to the existing data
        combined_df = pd.concat([existing_df, filtered_df], ignore_index=True)

        return combined_df

    except Exception as e:
        print(f'Error combining data: {e}')



### Combine Data with Datetime Already Initialized

In [23]:
def update_columns_in_combined_df(existing_df, downloaded_df, downloaded_column,existing_column ):
    try:
        for index, row in downloaded_df.iterrows():
            datetime_pst = row['DateTimePST']
            value = row[downloaded_column]

            # Find the row in 'existing_df' with matching 'DateTime' and update the specified columns
            existing_df.loc[existing_df['DateTime'] == datetime_pst, existing_column] = value

        return existing_df

    except Exception as e:
        print(f'Error updating columns: {e}')

### TD1 Soil Moisture 15 cm

In [6]:
# Site 1: Soil Moisture Top 15 cm
new_file_url = 'https://monitormywatershed.org/api/csv-values/?result_id=7638'
column_label = 'TD1_SoilMoisture_Top15cm'
downloaded_df = download_csv_to_dataframe(new_file_url, column_label)

          DateTimePST  TD1_SoilMoisture_Top15cm
0 2023-05-10 13:36:00                       0.1
1 2023-05-10 13:38:00                       0.1
2 2023-05-10 13:40:00                       0.1
3 2023-05-10 13:42:00                       0.1
4 2023-05-10 13:44:00                       0.1


In [11]:
# Apply function to combine data
existing_file_path = 'U:/Research_and_Innovation/_ModellingGroupProjects/ThomasDairy_Continuous Data/ThomasDairy_Data/ThomasDairy1/ThomasDairy1_all.xlsx'
column_rename_dict = {'DateTimePST': 'DateTime', 'TD1_SoilMoisture_Top15cm': 'SoilMoisture_top15cm'}
combined_data = combine_data(existing_file_path, column_rename_dict, downloaded_df)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.rename(columns=column_rename_dict, inplace=True)


In [14]:
combined_data.tail()

Unnamed: 0,DateTime,SoilMoisture_top15cm,SoilMoisture_15_30cm,SoilMoisture_30_45cm,SoilMoisture_45_60cm,SoilMoisture_60_75cm,SoilMoisture_75_90cm,SoilMoisture_90_105cm,SoilMoisture_105_120cm,SoilTemperature_3.5cm,...,SoilTemperature_80cm,SoilTemperature_90cm,SoilTemperature_100cm,SoilTemperature_110cm,SoilTemperature_120cm,gageHeight,pressureGauge,Keller_Temp,Y520-A_Cond,Y520-A_Temp
118704,2024-01-03 10:06:00,78.0,,,,,,,,,...,,,,,,,,,,
118705,2024-01-03 10:08:00,78.1,,,,,,,,,...,,,,,,,,,,
118706,2024-01-03 10:10:00,78.2,,,,,,,,,...,,,,,,,,,,
118707,2024-01-03 10:12:00,78.3,,,,,,,,,...,,,,,,,,,,
118708,2024-01-03 10:14:00,78.4,,,,,,,,,...,,,,,,,,,,


In [15]:
# Save the 'combined_df' DataFrame to replace the existing Excel workbook
combined_data.to_excel('U:/Research_and_Innovation/_ModellingGroupProjects/ThomasDairy_Continuous Data/ThomasDairy_Data/ThomasDairy1/ThomasDairy1_all.xlsx', index=False)

### TD1_Soil Moisture 15-30 cm

In [16]:
# Site 1: Soil Moisture Top 15-30 cm
new_file_url = 'https://monitormywatershed.org/api/csv-values/?result_id=7643'
column_label = 'TD1_SoilMoisture_Top15_30cm'
downloaded_df = download_csv_to_dataframe(new_file_url, column_label)

          DateTimePST  TD1_SoilMoisture_Top15_30cm
0 2023-05-10 13:36:00                          0.1
1 2023-05-10 13:38:00                          0.1
2 2023-05-10 13:40:00                          0.1
3 2023-05-10 13:42:00                          0.1
4 2023-05-10 13:44:00                          0.1


In [25]:
existing_df = pd.read_excel('U:/Research_and_Innovation/_ModellingGroupProjects/ThomasDairy_Continuous Data/ThomasDairy_Data/ThomasDairy1/ThomasDairy1_all.xlsx')
downloaded_column = 'TD1_SoilMoisture_Top15_30cm'
existing_column = 'SoilMoisture_15_30cm'
new_df = update_columns_in_combined_df(existing_df, downloaded_df, downloaded_column,existing_column )

In [26]:
# Save the 'new_df' DataFrame to replace the existing Excel workbook
new_df.to_excel('U:/Research_and_Innovation/_ModellingGroupProjects/ThomasDairy_Continuous Data/ThomasDairy_Data/ThomasDairy1/ThomasDairy1_all.xlsx', index=False)