Our dataset consists of 1 image (using ESP32-CAM, prob 800 X 600), 1 temperature reading, 1 humidity reading, 1 barometric reading, and 1 wind speed reading.
Output data will be whether it is raining at the timestep of the next 5 minutes.
A CNN model will be used for the image, then the result is used as together with the sensor data in a decision tree.

The below code is to mount to google drive for loading and saving purposes.

In [56]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [57]:
import os
os.chdir("/content/drive/MyDrive/CS3237_ML_Training")

In [58]:
from google.colab import output
output.enable_custom_widget_manager()

The next code below is for the CNN training

In [None]:
# Hyperparameters
input_size = (128, 128)
num_classes = 6
batch_size = 50
epochs = 100
learning_rate = 0.001

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

classes = ("A-Clear Sky", "B-Patterned Clouds", "C-Thin White Clouds", "D-Thick White Clouds", "E-Thick Dark Clouds", "F-Veil Clouds")

# Data transform for training
transform = transforms.Compose([
  transforms.Resize(input_size),
  transforms.ToTensor(),
  transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
  transforms.RandomHorizontalFlip(),
  transforms.RandomRotation(15),
])

# Load data
train_data = datasets.ImageFolder(root='Dataset2/train', transform=transform)
test_data = datasets.ImageFolder(root='Dataset2/test', transform=transform)

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

NameError: name 'input_size' is not defined

In [None]:
# Define the CNN model
class WeatherModel(nn.Module):
  def __init__(self, num_classes):
    super(WeatherModel, self).__init__()
    self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
    self.pool = nn.MaxPool2d(2, 2)
    self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
    self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
    self.conv4 = nn.Conv2d(128, 128, kernel_size=3, padding=1)

    self.fc1 = nn.Linear(128 * 8 * 8, 256)
    self.dropout = nn.Dropout(0.5)
    self.fc2 = nn.Linear(256, num_classes)

  def forward(self, x):
    x = self.pool(torch.relu(self.conv1(x)))
    x = self.pool(torch.relu(self.conv2(x)))
    x = self.pool(torch.relu(self.conv3(x)))
    x = self.pool(torch.relu(self.conv4(x)))

    x = torch.flatten(x, 1)
    x = torch.relu(self.fc1(x))
    x = self.dropout(x)
    x = self.fc2(x)

    return x

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = WeatherModel(num_classes=num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
model.train()

# Training loop
for epoch in range(epochs):
  running_loss = 0.0
  validation_loss = 0.0
  for images, labels in train_loader:
    images, labels = images.to(device), labels.to(device)
    optimizer.zero_grad()
    train_outputs = model(images)
    train_loss = criterion(train_outputs, labels)
    train_loss.backward()
    optimizer.step()
    running_loss += train_loss.item()
  for images, labels in test_loader:
    images, labels = images.to(device), labels.to(device)
    test_outputs = model(images)
    test_loss = criterion(test_outputs, labels)
    validation_loss += test_loss.item()
  print("Epoch", epoch, "Training Loss:", running_loss, " Test Loss:", validation_loss)
torch.save(model.state_dict(), 'WeatherModel.pth')

Epoch 0 Training Loss: 40.79841464757919  Test Loss: 5.637934118509293
Epoch 1 Training Loss: 27.446874856948853  Test Loss: 4.113233186304569
Epoch 2 Training Loss: 22.56625720858574  Test Loss: 3.5691803842782974
Epoch 3 Training Loss: 18.6502425968647  Test Loss: 2.842763312160969
Epoch 4 Training Loss: 14.975115582346916  Test Loss: 2.7511617336422205
Epoch 5 Training Loss: 13.802535817027092  Test Loss: 2.5490060755982995
Epoch 6 Training Loss: 9.011527478694916  Test Loss: 1.957669073715806
Epoch 7 Training Loss: 9.589469268918037  Test Loss: 2.089536768384278
Epoch 8 Training Loss: 7.805121347308159  Test Loss: 1.7476374595426023
Epoch 9 Training Loss: 7.357932902872562  Test Loss: 1.7223176965489984
Epoch 10 Training Loss: 6.694688588380814  Test Loss: 1.5368231385946274
Epoch 11 Training Loss: 6.192168101668358  Test Loss: 1.4252266734838486
Epoch 12 Training Loss: 5.071500509977341  Test Loss: 1.5087617486715317
Epoch 13 Training Loss: 5.456499453634024  Test Loss: 1.35328529

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = WeatherModel(num_classes=num_classes).to(device)
model.load_state_dict(torch.load("WeatherModel.pth", map_location=torch.device('cpu')))


  model.load_state_dict(torch.load("WeatherModel.pth", map_location=torch.device('cpu')))


<All keys matched successfully>

In [None]:
model.eval()
class_correct = list(0. for i in range(num_classes))
class_total = list(0. for i in range(num_classes))
with torch.no_grad():
  for images, labels in train_loader:
    images, labels = images.to(device), labels.to(device)
    outputs = model(images)
    _, predicted = torch.max(outputs, 1)
    c = (predicted == labels).squeeze()
    for i in range(len(labels)):
      label = labels[i]
      class_correct[label] += c[i].item()
      class_total[label] += 1

for i in range(num_classes):
  print('Train Accuracy of %5s: %2d%% (%2d/%2d)' % (classes[i], 100.0 * class_correct[i] / class_total[i], class_correct[i], class_total[i]))

print('\nTrain Accuracy (Overall): %2d%% (%2d/%2d)' % (100.0 * sum(class_correct) / sum(class_total), sum(class_correct), sum(class_total)))

Train Accuracy of A-Clear Sky: 100% (300/300)
Train Accuracy of B-Patterned Clouds: 100% (300/300)
Train Accuracy of C-Thin White Clouds: 99% (299/300)
Train Accuracy of D-Thick White Clouds: 99% (297/300)
Train Accuracy of E-Thick Dark Clouds: 99% (299/300)
Train Accuracy of F-Veil Clouds: 100% (300/300)

Train Accuracy (Overall): 99% (1795/1800)


In [None]:
model.eval()
class_correct = list(0. for i in range(num_classes))
class_total = list(0. for i in range(num_classes))
with torch.no_grad():
  for images, labels in test_loader:
    images, labels = images.to(device), labels.to(device)
    outputs = model(images)
    _, predicted = torch.max(outputs, 1)
    c = (predicted == labels).squeeze()
    for i in range(len(labels)):
      label = labels[i]
      class_correct[label] += c[i].item()
      class_total[label] += 1

for i in range(num_classes):
  print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (classes[i], 100.0 * class_correct[i] / class_total[i], class_correct[i], class_total[i]))

print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (100.0 * sum(class_correct) / sum(class_total), sum(class_correct), sum(class_total)))

Test Accuracy of A-Clear Sky: 98% (49/50)
Test Accuracy of B-Patterned Clouds: 98% (49/50)
Test Accuracy of C-Thin White Clouds: 94% (47/50)
Test Accuracy of D-Thick White Clouds: 86% (43/50)
Test Accuracy of E-Thick Dark Clouds: 100% (50/50)
Test Accuracy of F-Veil Clouds: 96% (48/50)

Test Accuracy (Overall): 95% (286/300)


Below is the decision tree training code.

In [None]:
import pandas as pd
import torch
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.tree import export_text
import pickle

# Load collected data (processed in DecisionTree.ipynb)
torch_data_5a = torch.load("dataset_11_5a.pt", weights_only=True)
target_data_5a = torch.load("target_11_5a.pt", weights_only=True)

X_5a = pd.DataFrame(torch_data_5a)
y_5a = pd.DataFrame(target_data_5a)
# X_5a.columns = ['cloudClass', 'windSpeed', 'bmpTemp', 'bmpPressure', 'dhtTemp', 'dhtHumidity']
X_5a.drop(X_5a.tail(5).index, inplace=True)
y_5a.drop(y_5a.head(5).index, inplace=True)

torch_data_5b = torch.load("dataset_11_5b.pt", weights_only=True)
target_data_5b = torch.load("target_11_5b.pt", weights_only=True)

X_5b = pd.DataFrame(torch_data_5b)
y_5b = pd.DataFrame(target_data_5b)
# X_5b.columns = ['cloudClass', 'windSpeed', 'bmpTemp', 'bmpPressure', 'dhtTemp', 'dhtHumidity']
X_5b.drop(X_5b.tail(5).index, inplace=True)
y_5b.drop(y_5b.head(5).index, inplace=True)

X = pd.concat([X_5a, X_5b], ignore_index=True)
y = pd.concat([y_5a, y_5b], ignore_index=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1, shuffle=True)

# Train decision tree classifier
clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)

with open('decision_tree.pkl', 'wb') as file:
  pickle.dump(clf, file)

# Evaluate model
y_train_pred = clf.predict(X_train)
train_accuracy = accuracy_score(y_train, y_train_pred)
print(f"Train Accuracy: {train_accuracy}")

y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy}")

tree_rules = export_text(clf, feature_names=X.columns[0:])
print(f"Decision Tree: \n{tree_rules}")

Train Accuracy: 1.0
Test Accuracy: 0.9583333333333334
Decision Tree: 
|--- 2 <= 28.90
|   |--- 5 <= 93.50
|   |   |--- 4 <= 24.45
|   |   |   |--- 3 <= 100861.50
|   |   |   |   |--- class: 1
|   |   |   |--- 3 >  100861.50
|   |   |   |   |--- 1 <= 0.84
|   |   |   |   |   |--- class: 0
|   |   |   |   |--- 1 >  0.84
|   |   |   |   |   |--- class: 1
|   |   |--- 4 >  24.45
|   |   |   |--- 2 <= 26.15
|   |   |   |   |--- 4 <= 24.85
|   |   |   |   |   |--- 3 <= 100812.00
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- 3 >  100812.00
|   |   |   |   |   |   |--- 3 <= 100818.00
|   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |--- 3 >  100818.00
|   |   |   |   |   |   |   |--- 3 <= 100820.50
|   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |--- 3 >  100820.50
|   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |--- 4 >  24.85
|   |   |   |   |   |--- class: 0
|   |   |   |--- 2 >  26.15
|   |   |   |   |--- class: 1
| 

In [60]:
import pickle

with open('decision_tree.pkl', 'rb') as file:
  clf = pickle.load(file)
  tree_rules = export_text(clf, feature_names=X.columns[0:])
  print(f"Decision Tree: \n{tree_rules}")

Decision Tree: 
|--- 2 <= 28.90
|   |--- 5 <= 93.50
|   |   |--- 4 <= 24.45
|   |   |   |--- 3 <= 100861.50
|   |   |   |   |--- class: 1
|   |   |   |--- 3 >  100861.50
|   |   |   |   |--- 1 <= 0.84
|   |   |   |   |   |--- class: 0
|   |   |   |   |--- 1 >  0.84
|   |   |   |   |   |--- class: 1
|   |   |--- 4 >  24.45
|   |   |   |--- 2 <= 26.15
|   |   |   |   |--- 4 <= 24.85
|   |   |   |   |   |--- 3 <= 100812.00
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- 3 >  100812.00
|   |   |   |   |   |   |--- 3 <= 100818.00
|   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |--- 3 >  100818.00
|   |   |   |   |   |   |   |--- 3 <= 100820.50
|   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |--- 3 >  100820.50
|   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |--- 4 >  24.85
|   |   |   |   |   |--- class: 0
|   |   |   |--- 2 >  26.15
|   |   |   |   |--- class: 1
|   |--- 5 >  93.50
|   |   |--- 1 <= 0.56
|   |   |   |