# Install modules

In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow.keras.backend as K
import matplotlib.pyplot as plt
import os
from tensorflow.keras.utils import to_categorical

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from sklearn.metrics import f1_score, accuracy_score, roc_auc_score

from keras.models import Sequential
from keras.layers import Dense, Flatten, LSTM
from keras.optimizers import Adam
from tensorflow.keras.metrics import Precision, Recall, AUC
os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # Use GPU device 0

# Get data in right format

In [9]:
df = pd.read_csv("preprocesseddata.csv",index_col="datetime")

#splitting into target and input variables
X = df.drop(['classification_region1', 'classification_region2', 'classification_region3'], axis=1)
y1 = df[['classification_region1']]
y2 = df[['classification_region2']]
y3 = df[['classification_region3']]

# Define the number of splits for the TimeSeriesSplit
n_splits = 5

# Define the TimeSeriesSplit object
tscv = TimeSeriesSplit(n_splits=n_splits)

# Build and train model using timeseries split

In [10]:
# Define the parameter grid for hyperparameter search
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5, 10]
}

# Split the data into training and testing sets using TimeSeriesSplit
for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y1_train, y1_test = y1.iloc[train_index], y1.iloc[test_index]
    y2_train, y2_test = y2.iloc[train_index], y2.iloc[test_index]
    y3_train, y3_test = y3.iloc[train_index], y3.iloc[test_index]

    # Perform hyperparameter search
    grid_search = GridSearchCV(
        estimator=RandomForestClassifier(random_state=42),
        param_grid=param_grid,
        scoring='f1_macro',
        n_jobs=-1,
        cv=3
    )
    grid_search.fit(X_train, y1_train)
    rf1 = grid_search.best_estimator_

    grid_search.fit(X_train, y2_train)
    rf2 = grid_search.best_estimator_

    grid_search.fit(X_train, y3_train)
    rf3 = grid_search.best_estimator_

    # Make predictions
    y1_pred = rf1.predict(X_test)
    y2_pred = rf2.predict(X_test)
    y3_pred = rf3.predict(X_test)

    # Calculate F1 scores
    f1_score1 = f1_score(y1_test, y1_pred)
    f1_score2 = f1_score(y2_test, y2_pred)
    f1_score3 = f1_score(y3_test, y3_pred)

    # Print final F1 scores
    print("F1 score for classification_region1: {:.2f}".format(f1_score1))
    print("F1 score for classification_region2: {:.2f}".format(f1_score2))
    print("F1 score for classification_region3: {:.2f}".format(f1_score3))

  self.best_estimator_.fit(X, y, **fit_params)
  self.best_estimator_.fit(X, y, **fit_params)
  self.best_estimator_.fit(X, y, **fit_params)


F1 score for classification_region1: 0.71
F1 score for classification_region2: 0.00
F1 score for classification_region3: 0.86


  self.best_estimator_.fit(X, y, **fit_params)
  self.best_estimator_.fit(X, y, **fit_params)
  self.best_estimator_.fit(X, y, **fit_params)


F1 score for classification_region1: 0.85
F1 score for classification_region2: 0.62
F1 score for classification_region3: 0.91


  self.best_estimator_.fit(X, y, **fit_params)
  self.best_estimator_.fit(X, y, **fit_params)
  self.best_estimator_.fit(X, y, **fit_params)


F1 score for classification_region1: 0.81
F1 score for classification_region2: 0.79
F1 score for classification_region3: 0.92


  self.best_estimator_.fit(X, y, **fit_params)
  self.best_estimator_.fit(X, y, **fit_params)
  self.best_estimator_.fit(X, y, **fit_params)


F1 score for classification_region1: 0.90
F1 score for classification_region2: 0.76
F1 score for classification_region3: 0.94


  self.best_estimator_.fit(X, y, **fit_params)
  self.best_estimator_.fit(X, y, **fit_params)
  self.best_estimator_.fit(X, y, **fit_params)


F1 score for classification_region1: 0.94
F1 score for classification_region2: 0.78
F1 score for classification_region3: 0.93


# Evaluate model using test data

In [15]:
df = pd.read_csv("test_preprocesseddata.csv",index_col="datetime")

X = df.drop(['classification_region1', 'classification_region2', 'classification_region3'], axis=1)
y1 = df[['classification_region1']]
y2 = df[['classification_region2']]
y3 = df[['classification_region3']]


# Make predictions
y1_pred = rf1.predict(X)
y2_pred = rf2.predict(X)
y3_pred = rf3.predict(X)

# Calculate F1 scores
f1_score1 = f1_score(y1, y1_pred)
f1_score2 = f1_score(y2, y2_pred)
f1_score3 = f1_score(y3, y3_pred)

# Calculate accuracy
accuracy1 = accuracy_score(y1, y1_pred)
accuracy2 = accuracy_score(y2, y2_pred)
accuracy3 = accuracy_score(y3, y3_pred)

# Calculate roc_auc_score (Assuming binary classification)
roc_auc1 = roc_auc_score(y1, y1_pred)
roc_auc2 = roc_auc_score(y2, y2_pred)
roc_auc3 = roc_auc_score(y3, y3_pred)

# Print final scores for test set
print("Accuracy for classification_region1: {:.2f}".format(accuracy1))
print("F1 score for classification_region1: {:.2f}".format(f1_score1))
print("ROC AUC score for classification_region1: {:.2f}".format(roc_auc1))

print("\nAccuracy for classification_region2: {:.2f}".format(accuracy2))
print("F1 score for classification_region2: {:.2f}".format(f1_score2))
print("ROC AUC score for classification_region2: {:.2f}".format(roc_auc2))

print("\nAccuracy for classification_region3: {:.2f}".format(accuracy3))
print("F1 score for classification_region3: {:.2f}".format(f1_score3))
print("ROC AUC score for classification_region3: {:.2f}".format(roc_auc3))

Accuracy for classification_region1: 0.90
F1 score for classification_region1: 0.78
ROC AUC score for classification_region1: 0.83

Accuracy for classification_region2: 0.92
F1 score for classification_region2: 0.77
ROC AUC score for classification_region2: 0.85

Accuracy for classification_region3: 0.96
F1 score for classification_region3: 0.92
ROC AUC score for classification_region3: 0.94
