In [33]:
import os
import numpy as np
from numpy import array, zeros
import pandas as pd
import tensorflow as tf
from matplotlib import pyplot as plt
from keras import layers, models, Model
from keras.models import load_model
from keras.layers import Input, Conv2D, MaxPooling2D, Dense, Flatten, LSTM, Dropout, TimeDistributed, Conv1D, MaxPooling1D, Concatenate
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
from PIL import Image

In [45]:
# Path:

data_path = "path/to/final/csv/file"

n_features = 1
n_seq = 3
n_steps = 1

def process_image(image_path):
    image = Image.open(image_path)
    image.load()
    image = image.resize((224, 224))
    data = np.asarray(image, dtype="int32")
    return data

def string_to_array(string):
    values = string.split(',')
    array = np.array([float(value) for value in values])
    return array

def split_sequence(sequence, n_steps):
    x, y = list(), list()
    for i in range(len(sequence)):
        end_ix = i + n_steps
        if end_ix > len(sequence)-1:
            break
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        x.append(seq_x)
        y.append(seq_y)
    return array(x), array(y)

In [5]:
# Build & Train model

# Model 1 - Sequences
lstm_input = Input(shape=(None, n_steps, n_features))
conv1 = TimeDistributed(Conv1D(filters=64, kernel_size=1, activation='relu'))(lstm_input)
maxpool1 = TimeDistributed(MaxPooling1D(pool_size=1))(conv1)
flatten1 = TimeDistributed(Flatten())(maxpool1)
lstm_out = LSTM(50, activation='relu')(flatten1)
    
# Model 2 - Image
input_img_size = (224, 224, 4)
image_input = Input(shape=input_img_size)
conv2 = Conv2D(32, kernel_size=(3, 3), activation="relu")(image_input)
conv3 = Conv2D(64, (3, 3), activation="relu")(conv2)
maxpool2 = MaxPooling2D(pool_size=(2, 2))(conv3)
dropout = Dropout(0.25)(maxpool2)
flatten2 = Flatten()(dropout)
dense = Dense(64, activation="relu")(flatten2)
    
# Concatenate both models
concatenated = Concatenate()([lstm_out, dense])
output = Dense(1)(concatenated)

combined_model = Model(inputs=[lstm_input, image_input], outputs=output)
combined_model.compile(optimizer='adam', loss='mse')

#combined_model.save('D:/UIT/KLTN/my_model.h5')
#model = load_model('D:/UIT/KLTN/my_model.h5')

In [51]:
# prepare data
df = pd.read_csv(data_path)
sorted_df = df.sort_values(by=['City', 'Time'], ascending=True)
grouped_df = sorted_df.groupby('City')
city_dataframes = {}
for city, group in grouped_df:
    city_dataframes[city] = group
edited_dfs = []
for city, city_df in city_dataframes.items():
    if len(city_df) > 3:
        rain_array = city_df['Rain (mm)'].to_numpy()
        x, _ = split_sequence(rain_array, 3)
        city_df = city_df.iloc[3:]
        city_df.loc[:, 'Sequence'] = x.tolist()
        edited_dfs.append(city_df)
    else: 
        city_df = pd.DataFrame()
        
final_df = pd.concat(edited_dfs)  
print(final_df.to_string())
#combined_model.save('D:/UIT/KLTN/my_model.h5')

                                                                    Path  Rain (mm)                 Time                   City            Sequence
141                 D:/UIT/KLTN/train_img/2024-01-14-16-01-52/Ba Ria.png       0.00  2024-01-14-16-01-52                 Ba Ria     [0.0, 0.0, 0.0]
188                 D:/UIT/KLTN/train_img/2024-01-14-17-01-51/Ba Ria.png       0.00  2024-01-14-17-01-51                 Ba Ria     [0.0, 0.0, 0.0]
235                 D:/UIT/KLTN/train_img/2024-01-14-18-01-55/Ba Ria.png       0.00  2024-01-14-18-01-55                 Ba Ria     [0.0, 0.0, 0.0]
282                 D:/UIT/KLTN/train_img/2024-01-14-20-01-51/Ba Ria.png       0.00  2024-01-14-20-01-51                 Ba Ria     [0.0, 0.0, 0.0]
329                 D:/UIT/KLTN/train_img/2024-01-14-21-01-52/Ba Ria.png       0.00  2024-01-14-21-01-52                 Ba Ria     [0.0, 0.0, 0.0]
376                 D:/UIT/KLTN/train_img/2024-01-14-22-01-52/Ba Ria.png       0.00  2024-01-14-22-01-52        

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  city_df.loc[:, 'Sequence'] = x.tolist()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  city_df.loc[:, 'Sequence'] = x.tolist()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  city_df.loc[:, 'Sequence'] = x.tolist()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row

In [54]:
# train model
final_df['Sequence'] = list(final_df['Sequence'])
list_of_arrays = final_df['Sequence'].apply(lambda x: string_to_array(x))
X_1 = np.vstack(list_of_arrays)                                             
X_1 = X_1.reshape((X_1.shape[0], n_seq, n_steps, n_features))                       #Sequence

X_2 = np.array(list(final_df['Path'].apply(process_image)), dtype=np.float32)         #Image
Y = final_df["Rain (mm)"].to_numpy()                                                      #Label
            
# fit model
combined_model.fit([X_1, X_2], Y, epochs=1, verbose=True, validation_split=0.2, batch_size=16)

AttributeError: 'list' object has no attribute 'split'