In [77]:
import os
import numpy as np
from numpy import array
import pandas as pd
import tensorflow as tf
from matplotlib import pyplot as plt
from keras import Sequential, layers, models, utils, Model
from keras.utils import Sequence, to_categorical
from keras.layers import Input, Conv2D, MaxPooling2D, Dense, Flatten, LSTM, Dropout, TimeDistributed, Conv1D, MaxPooling1D, Concatenate
from keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from PIL import Image
import csv
import cv2
import ast

In [69]:
# Path:

base_path = "D:/UIT/DACN/"
csv_folder_path = os.path.join(base_path, "train_csv/")
img_folder_path = os.path.join(base_path, "train_img/")
img_path_test = ""
img_path = ""

def process_image(image_path):
    image = Image.open(image_path)
    image.load()
    image = image.resize((224, 224))
    data = np.asarray(image, dtype="int32")
    return data

def string_to_array(string):
    # Tách chuỗi thành các giá trị số
    values = string.split(',')
    # Chuyển đổi các giá trị số thành float và tạo một mảng numpy từ chúng
    array = np.array([float(value) for value in values])
    return array

In [78]:
# Build & Train model

def train_model(df):
    # Chuẩn bị dữ liệu
    df['Sequences'] = list(df['Sequences'])
    list_of_arrays = df['Sequences'].apply(lambda x: string_to_array(x))
    X_2 = np.vstack(list_of_arrays)
    n_features = 1
    n_seq = 3
    n_steps = 1
    X_2 = X_2.reshape((X_2.shape[0], n_seq, n_steps, n_features))
    
    X_1 = np.array(list(df['IMG_Path'].apply(process_image)), dtype=np.float32)
    Y = df["Rain (mm)"].to_numpy()
    
    # Model 1 - Sequences
    lstm_input = Input(shape=(None, n_steps, n_features))
    conv1 = TimeDistributed(Conv1D(filters=64, kernel_size=1, activation='relu'))(lstm_input)
    maxpool1 = TimeDistributed(MaxPooling1D(pool_size=1))(conv1)
    flatten1 = TimeDistributed(Flatten())(maxpool1)
    lstm_out = LSTM(50, activation='relu')(flatten1)
    
    # Model 2 - Image
    input_img_size = (224, 224, 4)
    image_input = Input(shape=input_img_size)
    conv2 = Conv2D(32, kernel_size=(3, 3), activation="relu")(image_input)
    conv3 = Conv2D(64, (3, 3), activation="relu")(conv2)
    maxpool2 = MaxPooling2D(pool_size=(2, 2))(conv3)
    dropout = Dropout(0.25)(maxpool2)
    flatten2 = Flatten()(dropout)
    dense = Dense(64, activation="relu")(flatten2)
    
    # Concatenate both models
    concatenated = Concatenate()([lstm_out, dense])

    output = Dense(1)(concatenated)
    
    combined_model = Model(inputs=[lstm_input, image_input], outputs=output)
    combined_model.compile(optimizer='adam', loss='mse')
    
    #print(Y)
    # fit model
    combined_model.fit([X_2, X_1], Y, epochs=1, verbose=True, validation_split=0.2, batch_size=16)

    # loss = model.evaluate([X_images_test, X_sequences_test_scaled], y_test)
    # print("Test Loss:", loss)

In [71]:
# Function:

def get_csv_file_name(file_path):
    csv_file = os.path.basename(file_path)
    csv_name = csv_file[:-4]
    return csv_name

def find_img_folder_matched(csv_name):
    for img_folder in os.listdir(img_folder_path):
        full_img_folder_path = os.path.join(img_folder_path, img_folder)   
        if os.path.isdir(full_img_folder_path) and img_folder == csv_name:
            matching_img_folder = full_img_folder_path
            break
    return matching_img_folder

def preprocess_image(image_path, target_size):
    image = Image.open(image_path)
    image = image.resize(target_size)
    # Perform any additional preprocessing steps as needed (e.g., normalization)
    return np.array(image)  # Convert PIL image to numpy array

def preprocess_sequence(sequence):
    # Perform any preprocessing steps (e.g., normalization)
    return np.array(sequence)

def prepare_data_to_train(file, path, file_count, csv_file_paths):
    label_file_path = os.path.join(path, file)             # file csv được chọn ban đầu đặt làm file target (đường dẫn)
    target_file_path = csv_file_paths[file_count - 1]
    target_file_name = get_csv_file_name(target_file_path)  # từ đường dẫn file target -> tên file (time file)
    img_folder_matched = find_img_folder_matched(target_file_name)  # từ tên file (time file) -> folder chứa ảnh tương ứng
    
    # Đọc file CSV và chỉ đọc các cột được chỉ định vào DataFrame
    columns_to_read = ["City", "Rain (mm)"]                      
    df = pd.read_csv(label_file_path, usecols=columns_to_read)
    
    num_row = df.shape[0]
    sequences = np.empty((num_row, 3))
    for i in range(3):  # Vòng lặp từ i=0 đến i=2
        # Đọc file CSV
        file_index = file_count + i - 3 
        new_df = pd.read_csv(csv_file_paths[file_index])
        # Gán giá trị vào cột i của ma trận sequences
        sequences[:, i] = new_df["Rain (mm)"]

    # Tạo DataFrame từ ma trận sequences với tên cột là "Sequences"
    sequences_df = pd.DataFrame(sequences)
    sequences_df["Sequences"] = sequences_df.apply(lambda row: ','.join(map(str, row)), axis=1)
    sequences_df["Combined_Column"] = sequences_df.apply(lambda row: row.tolist(), axis=1)
    sequences_df = sequences_df.drop(columns=["Combined_Column"])

    df.rename(columns={"City": "IMG_Path"}, inplace=True)
    # Áp dụng hàm lambda vào cột "IMG_Path"
    df["IMG_Path"] = df["IMG_Path"].apply(lambda city_name: os.path.join(img_folder_matched, f"{city_name}.png"))
    
    # Ghép DataFrame sequences_df vào giữa 2 cột của DataFrame df
    df = pd.concat([df.iloc[:, :1], sequences_df, df.iloc[:, 1:]], axis=1)
    pd.set_option("display.max_colwidth", None)
    # df.rename(columns={"Rain (mm)": "Rain_mm"}, inplace=True)
    
    global img_path
    img_path = img_folder_matched
    
    return df.drop(columns=df.columns[1:4])


In [79]:
# Train:

csv_file_paths = []
file_count = 0
df=pd.DataFrame()


for csv_file in os.listdir(csv_folder_path):
    if csv_file.endswith(".csv"):
        csv_paths = os.path.join(csv_folder_path, csv_file)
        csv_file_paths.append(csv_paths)   

        if file_count >= 3:
            df = prepare_data_to_train(csv_file, csv_folder_path, file_count, csv_file_paths)
            train_model(df)
            # df.to_csv(output_csv_path, index=False)
            column_types = df.dtypes
            #print("Kiểu dữ liệu của cột: ", column_types["Rain (mm)"])
            #print("Giá trị trong cột: ", type(df["Rain (mm)"][4]))
            #print(df.to_string(index=False))
        file_count += 1
        



KeyboardInterrupt: 