In [9]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.impute import SimpleImputer
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Dense, Flatten
from tensorflow.keras.optimizers import Adam

# Load the dataset
data_path = 'filtered_ecosmart_pot_data-1.csv'  # Update this path
data = pd.read_csv(data_path)

In [10]:
data.head()

Unnamed: 0,nmme0-tmp2m-34w__cancm30,nmme0-tmp2m-34w__ccsm30,contest-pevpr-sfc-gauss-14d__pevpr,wind-vwnd-925-2010-1,startdate,day_temp_optimal,night_temp_optimal_foliage,night_temp_optimal_flowering,overall_temp_optimal,temp_stress,wind_stress,plant_stress_indicator
0,29.02,29.57,237.0,-107.46,2014-09-01,0,0,0,0,5.395,0,5.395
1,29.02,29.57,228.9,-105.73,2014-09-02,0,0,0,0,5.395,0,5.395
2,29.02,29.57,220.69,-102.51,2014-09-03,0,0,0,0,5.395,0,5.395
3,29.02,29.57,225.28,-96.11,2014-09-04,0,0,0,0,5.395,0,5.395
4,29.02,29.57,237.24,-89.19,2014-09-05,0,0,0,0,5.395,0,5.395


In [11]:
# Check for missing values in the dataset
missing_values = data.isnull().sum()
print("Missing values in each column:\n", missing_values)

Missing values in each column:
 nmme0-tmp2m-34w__cancm30              0
nmme0-tmp2m-34w__ccsm30               0
contest-pevpr-sfc-gauss-14d__pevpr    0
wind-vwnd-925-2010-1                  0
startdate                             0
day_temp_optimal                      0
night_temp_optimal_foliage            0
night_temp_optimal_flowering          0
overall_temp_optimal                  0
temp_stress                           0
wind_stress                           0
plant_stress_indicator                0
dtype: int64


In [12]:
# show the data types of the columns
data.dtypes

nmme0-tmp2m-34w__cancm30              float64
nmme0-tmp2m-34w__ccsm30               float64
contest-pevpr-sfc-gauss-14d__pevpr    float64
wind-vwnd-925-2010-1                  float64
startdate                              object
day_temp_optimal                        int64
night_temp_optimal_foliage              int64
night_temp_optimal_flowering            int64
overall_temp_optimal                    int64
temp_stress                           float64
wind_stress                             int64
plant_stress_indicator                float64
dtype: object

In [13]:
# Convert 'startdate' to datetime format
data['startdate'] = pd.to_datetime(data['startdate'])

# Convert 'startdate' to Unix time
data['unix_time'] = data['startdate'].astype('int64') // 10**9

# Display the DataFrame with the Unix time column
print(data.head())

   nmme0-tmp2m-34w__cancm30  nmme0-tmp2m-34w__ccsm30  \
0                     29.02                    29.57   
1                     29.02                    29.57   
2                     29.02                    29.57   
3                     29.02                    29.57   
4                     29.02                    29.57   

   contest-pevpr-sfc-gauss-14d__pevpr  wind-vwnd-925-2010-1  startdate  \
0                              237.00               -107.46 2014-09-01   
1                              228.90               -105.73 2014-09-02   
2                              220.69               -102.51 2014-09-03   
3                              225.28                -96.11 2014-09-04   
4                              237.24                -89.19 2014-09-05   

   day_temp_optimal  night_temp_optimal_foliage  night_temp_optimal_flowering  \
0                 0                           0                             0   
1                 0                           0         

In [14]:
data.dtypes

nmme0-tmp2m-34w__cancm30                     float64
nmme0-tmp2m-34w__ccsm30                      float64
contest-pevpr-sfc-gauss-14d__pevpr           float64
wind-vwnd-925-2010-1                         float64
startdate                             datetime64[ns]
day_temp_optimal                               int64
night_temp_optimal_foliage                     int64
night_temp_optimal_flowering                   int64
overall_temp_optimal                           int64
temp_stress                                  float64
wind_stress                                    int64
plant_stress_indicator                       float64
unix_time                                      int64
dtype: object

In [15]:
# Define the columns we need for the models
required_columns = [
    'nmme0-tmp2m-34w__cancm30', 'nmme0-tmp2m-34w__ccsm30',  # Temperature Forecasts
    'contest-pevpr-sfc-gauss-14d__pevpr',  # Potential Evaporation Rate
    'wind-vwnd-925-2010-1',  # Example wind speed column
    'startdate',  # Date/Time Information
    # Derived target variables
    'day_temp_optimal', 'night_temp_optimal_foliage', 'night_temp_optimal_flowering',
    'overall_temp_optimal', 'temp_stress', 'wind_stress', 'plant_stress_indicator'
]

# Filter the dataset to only include these columns
filtered_data = data[required_columns].copy()

# Display the first few rows to verify the filtering
print(filtered_data.head())

   nmme0-tmp2m-34w__cancm30  nmme0-tmp2m-34w__ccsm30  \
0                     29.02                    29.57   
1                     29.02                    29.57   
2                     29.02                    29.57   
3                     29.02                    29.57   
4                     29.02                    29.57   

   contest-pevpr-sfc-gauss-14d__pevpr  wind-vwnd-925-2010-1  startdate  \
0                              237.00               -107.46 2014-09-01   
1                              228.90               -105.73 2014-09-02   
2                              220.69               -102.51 2014-09-03   
3                              225.28                -96.11 2014-09-04   
4                              237.24                -89.19 2014-09-05   

   day_temp_optimal  night_temp_optimal_foliage  night_temp_optimal_flowering  \
0                 0                           0                             0   
1                 0                           0         

In [16]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Define the subset size
subset_size = 1000

# Take a random sample of the data
subset_data = filtered_data.sample(n=subset_size, random_state=42)

# Define the rolling window size
window_size = 7

# Initialize lists to store evaluation metrics
accuracy_scores = []
precision_scores = []
recall_scores = []
f1_scores = []
confusion_matrices = []

# Initialize a list to store predictions for each window iteration
all_predictions = []

# Initialize a list to store rolling mean values for each window iteration
rolling_means = []

# Iterate over the subset with the rolling window
for i in range(len(subset_data) - window_size + 1):
    # Extract data for the current rolling window
    window_data = subset_data.iloc[i:i+window_size]
    
    # Calculate rolling mean values
    rolling_mean = window_data.mean(axis=0)  # Assuming you want the mean of all columns
    rolling_means.append(rolling_mean)
    
    # Extract features and target variable for the current window
    X_train = window_data[['nmme0-tmp2m-34w__cancm30', 'nmme0-tmp2m-34w__ccsm30', 
                           'contest-pevpr-sfc-gauss-14d__pevpr', 'wind-vwnd-925-2010-1']]
    y_train = window_data['overall_temp_optimal']

    # Initialize the imputer with strategy 'mean'
    imputer = SimpleImputer(strategy='mean')

    # Fit the imputer on X_train
    imputer.fit(X_train)

    # Transform X_train with the imputer
    X_train_imputed = imputer.transform(X_train)
    
    # Initialize and train the Random Forest model
    model = RandomForestClassifier()
    model.fit(X_train_imputed, y_train)
    
    # Predict the target variable for the current window
    predictions = model.predict(X_train_imputed)
    
    # Store the predictions for this window iteration
    all_predictions.extend(predictions)
    
    # Calculate evaluation metrics
    accuracy = accuracy_score(y_train, predictions)
    precision = precision_score(y_train, predictions)
    recall = recall_score(y_train, predictions)
    f1 = f1_score(y_train, predictions)
    confusion = confusion_matrix(y_train, predictions)
    
    # Store the evaluation metrics
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    f1_scores.append(f1)
    confusion_matrices.append(confusion)

# Print the evaluation metrics
print("Accuracy:", accuracy_scores)
print("Precision:", precision_scores)
print("Recall:", recall_scores)
print("F1 Score:", f1_scores)
print("Confusion Matrix:", confusion_matrices)

# Save the predictions and rolling means to a CSV file
predictions_df = pd.DataFrame({'Predictions': all_predictions})
rolling_means_df = pd.DataFrame(rolling_means, columns=subset_data.columns)  # Assuming columns are the same for rolling mean values
predictions_df.to_csv('rolling_window_predictions.csv', index=False)
rolling_means_df.to_csv('rolling_window_rolling_means.csv', index=False)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_

Accuracy: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,