In [2]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.impute import SimpleImputer
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, LSTM

# Function to load and preprocess data
def load_and_preprocess_data(directory):
    data = []
    for filename in os.listdir(directory):
        if filename.endswith(".csv"):
            filepath = os.path.join(directory, filename)
            df = pd.read_csv(filepath)
            # Drop unnecessary columns if any
            df.drop(columns=['longitude', 'latitude'], inplace=True)
            # Combine the columns related to each timestamp
            df = df.groupby(np.arange(len(df))//3).mean()
            data.append(df)
    data = pd.concat(data, ignore_index=True)
    return data

# Function to perform PCA with handling missing values
def perform_pca(data):
    # Impute missing values
    imputer = SimpleImputer(strategy='mean')  # You can change the strategy as per your requirement
    data_imputed = imputer.fit_transform(data)
    
    # Perform PCA
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(data_imputed)
    pca = PCA(n_components=10)  # You can change the number of components as per your requirement
    pca_data = pca.fit_transform(scaled_data)
    return pca_data

# Function to build CNN model
def build_cnn_model(input_shape):
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=input_shape))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(50, activation='relu'))
    model.add(Dense(1, activation='linear'))
    model.compile(loss='mse', optimizer='adam', metrics=['mse'])
    return model

# Function to build RNN model
def build_rnn_model(input_shape):
    model = Sequential()
    model.add(LSTM(50, return_sequences=True, input_shape=input_shape))
    model.add(LSTM(50, return_sequences=False))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    return model

# Function to build Random Forest Regressor model
#def build_rf_model():
    #return RandomForestRegressor()

# Main function
def main():
    directory = "/Users/kunalpathak9826/Desktop/ISRO/Data/Interpolated_CSV"
    
    # Load and preprocess data
    data = load_and_preprocess_data(directory)
    X = data.iloc[:, :-1].values
    y = data.iloc[:, -1].values
    
    # Perform PCA
    pca_data = perform_pca(X)
    
    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(pca_data, y, test_size=0.3, random_state=42)
    
    # Reshape data for CNN and RNN
    X_train_cnn = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_test_cnn = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
    input_shape = (X_train_cnn.shape[1], 1)
    
    # Build and train CNN model
    cnn_model = build_cnn_model(input_shape)
    cnn_model.fit(X_train_cnn, y_train, epochs=10, batch_size=32, verbose=1)
    
    # Build and train RNN model
    rnn_model = build_rnn_model(input_shape)
    rnn_model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)
    
    # Build and train Random Forest Regressor model
    #rf_model = build_rf_model()
    #rf_model.fit(X_train, y_train)
    
    # Evaluate models
    cnn_pred = cnn_model.predict(X_test_cnn)
    rnn_pred = rnn_model.predict(X_test)
    #rf_pred = rf_model.predict(X_test)
    
    # Calculate RMSE and MAE
    cnn_rmse = np.sqrt(mean_squared_error(y_test, cnn_pred))
    rnn_rmse = np.sqrt(mean_squared_error(y_test, rnn_pred))
    #rf_rmse = np.sqrt(mean_squared_error(y_test, rf_pred))
    
    cnn_mae = mean_absolute_error(y_test, cnn_pred)
    rnn_mae = mean_absolute_error(y_test, rnn_pred)
    #rf_mae = mean_absolute_error(y_test, rf_pred)
    
    print("CNN RMSE:", cnn_rmse)
    print("CNN MAE:", cnn_mae)
    print("RNN RMSE:", rnn_rmse)
    print("RNN MAE:", rnn_mae)
    #print("Random Forest RMSE:", rf_rmse)
    #print("Random Forest MAE:", rf_mae)

if __name__ == "__main__":
    main()


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


ValueError: Input contains NaN.

In [9]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.ensemble import RandomForestRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, LSTM

# Function to load and preprocess data
def load_and_preprocess_data(directory):
    dfs = []
    for filename in os.listdir(directory):
        if filename.endswith(".csv"):
            filepath = os.path.join(directory, filename)
            df = pd.read_csv(filepath)
            dfs.append(df)
    data = pd.concat(dfs, ignore_index=True)
    return data

# Function to perform PCA
def perform_pca(data):
    scaler = StandardScaler()
    imputer = SimpleImputer(strategy='mean')
    data_imputed = imputer.fit_transform(data)
    scaled_data = scaler.fit_transform(data_imputed)
    pca = PCA(n_components=10)  # You can change the number of components as per your requirement
    pca_data = pca.fit_transform(scaled_data)
    return pca_data

# Function to build CNN model
def build_cnn_model(input_shape):
    model = Sequential([
        Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=input_shape),
        MaxPooling1D(pool_size=2),
        Flatten(),
        Dense(50, activation='relu'),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse', metrics=['mse'])
    return model

# Function to build RNN model
def build_rnn_model(input_shape):
    model = Sequential([
        LSTM(50, activation='relu', input_shape=input_shape),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse', metrics=['mse'])
    return model

# Function to build Random Forest Regressor model
def build_rf_model():
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    return model

# Main function
def main():
    directory = "/Users/kunalpathak9826/Desktop/ISRO/Data/Interpolated_CSV"
    
    # Load and preprocess data
    data = load_and_preprocess_data(directory)
    X = data.iloc[:, :-1].values
    y = data.iloc[:, :-1].values
    
    # Perform PCA
    pca_data = perform_pca(X)
    
    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(pca_data, y, test_size=0.3, random_state=42)

    # Build and train CNN model
    input_shape = (X_train.shape[1], 1)
    cnn_model = build_cnn_model(input_shape)
    cnn_model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)
    
    # Build and train RNN model
    rnn_model = build_rnn_model(input_shape)
    rnn_model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)
    
    # Build and train Random Forest Regressor model
    rf_model = build_rf_model()
    rf_model.fit(X_train, y_train)
    
    # Evaluate models
    cnn_pred = cnn_model.predict(X_test)
    rnn_pred = rnn_model.predict(X_test)
    rf_pred = rf_model.predict(X_test)
    
    # Calculate RMSE and MAE
    cnn_rmse = np.sqrt(mean_squared_error(y_test, cnn_pred))
    rnn_rmse = np.sqrt(mean_squared_error(y_test, rnn_pred))
    rf_rmse = np.sqrt(mean_squared_error(y_test, rf_pred))
    
    cnn_mae = mean_absolute_error(y_test, cnn_pred)
    rnn_mae = mean_absolute_error(y_test, rnn_pred)
    rf_mae = mean_absolute_error(y_test, rf_pred)
    
    print("CNN RMSE:", cnn_rmse)
    print("CNN MAE:", cnn_mae)
    print("RNN RMSE:", rnn_rmse)
    print("RNN MAE:", rnn_mae)
    print("Random Forest RMSE:", rf_rmse)
    print("Random Forest MAE:", rf_mae)

if __name__ == "__main__":
    main()


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


ValueError: Input y contains NaN.

In [14]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.ensemble import RandomForestRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, LSTM

# Function to load and preprocess data
def load_and_preprocess_data(directory):
    dfs = []
    for filename in os.listdir(directory):
        if filename.endswith(".csv"):
            filepath = os.path.join(directory, filename)
            print(f"Loading data from file: {filename}")
            df = pd.read_csv(filepath)
            print("Columns in the loaded DataFrame:", df.columns)
            dfs.append(df)
    data = pd.concat(dfs, ignore_index=True)
    print("Concatenated DataFrame shape:", data.shape)
    print("Checking for NaN values in the concatenated data:")
    print(data[data.isnull().any(axis=1)])  # Check for rows with NaN values in any column
    return data

# Function to perform PCA
def perform_pca(data):
    scaler = StandardScaler()
    imputer = SimpleImputer(strategy='mean')
    data_imputed = imputer.fit_transform(data)
    scaled_data = scaler.fit_transform(data_imputed)
    pca = PCA(n_components=10)  # You can change the number of components as per your requirement
    pca_data = pca.fit_transform(scaled_data)
    return pca_data

# Function to build CNN model
def build_cnn_model(input_shape):
    model = Sequential([
        Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=input_shape),
        MaxPooling1D(pool_size=2),
        Flatten(),
        Dense(50, activation='relu'),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse', metrics=['mse'])
    return model

# Function to build RNN model
def build_rnn_model(input_shape):
    model = Sequential([
        LSTM(50, activation='relu', input_shape=input_shape),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse', metrics=['mse'])
    return model

# Function to build Random Forest Regressor model
def build_rf_model():
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    return model

# Main function
def main():
    directory = "/Users/kunalpathak9826/Desktop/ISRO/Data/Interpolated_CSV"
    
    # Load and preprocess data
    data = load_and_preprocess_data(directory)
    X = data.iloc[:, :-1].values
    y = data.iloc[:, :-1].values
    
    # Print unique values in y to understand which values are considered as NaN
    print("Unique values in the target variable (y):", np.unique(y))
    
    # Perform PCA
    pca_data = perform_pca(X)
    
    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(pca_data, y, test_size=0.3, random_state=42)

    # Build and train CNN model
    input_shape = (X_train.shape[1], 1)
    cnn_model = build_cnn_model(input_shape)
    cnn_model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)
    
    # Build and train RNN model
    rnn_model = build_rnn_model(input_shape)
    rnn_model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)
    
    # Build and train Random Forest Regressor model
    rf_model = build_rf_model()
    rf_model.fit(X_train, y_train)
    
    # Evaluate models
    cnn_pred = cnn_model.predict(X_test)
    rnn_pred = rnn_model.predict(X_test)
    rf_pred = rf_model.predict(X_test)
    
    # Calculate RMSE and MAE
    cnn_rmse = np.sqrt(mean_squared_error(y_test, cnn_pred))
    rnn_rmse = np.sqrt(mean_squared_error(y_test, rnn_pred))
    rf_rmse = np.sqrt(mean_squared_error(y_test, rf_pred))
    
    cnn_mae = mean_absolute_error(y_test, cnn_pred)
    rnn_mae = mean_absolute_error(y_test, rnn_pred)
    rf_mae = mean_absolute_error(y_test, rf_pred)
    
    print("CNN RMSE:", cnn_rmse)
    print("CNN MAE:", cnn_mae)
    print("RNN RMSE:", rnn_rmse)
    print("RNN MAE:", rnn_mae)
    print("Random Forest RMSE:", rf_rmse)
    print("Random Forest MAE:", rf_mae)

if __name__ == "__main__":
    main()


Loading data from file: interpolated_insat_on_imerg_20180101.csv
Columns in the loaded DataFrame: Index(['longitude', 'latitude', '20180101 0015 IMG_TIR1',
       '20180101 0015 IMG_TIR2', '20180101 0015 IMG_WV',
       '20180101 0045 IMG_TIR1', '20180101 0045 IMG_TIR2',
       '20180101 0045 IMG_WV', '20180101 0115 IMG_TIR1',
       '20180101 0115 IMG_TIR2', '20180101 0115 IMG_WV',
       '20180101 0215 IMG_TIR1', '20180101 0215 IMG_TIR2',
       '20180101 0215 IMG_WV', 'precipitationCal'],
      dtype='object')
Loading data from file: interpolated_insat_on_imerg_20181222.csv
Columns in the loaded DataFrame: Index(['longitude', 'latitude', '20181222 1615 IMG_TIR1',
       '20181222 1615 IMG_TIR2', '20181222 1615 IMG_WV',
       '20181222 1645 IMG_TIR1', '20181222 1645 IMG_TIR2',
       '20181222 1645 IMG_WV', 'precipitationCal'],
      dtype='object')
Loading data from file: interpolated_insat_on_imerg_20180408.csv
Columns in the loaded DataFrame: Index(['longitude', 'latitude', '2018

ValueError: Input y contains NaN.

In [15]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.impute import SimpleImputer
from keras.models import Sequential
from keras.layers import Dense

def load_data(file_paths):
    dfs = []
    for file_path in file_paths:
        print(f"Loading data from file: {file_path}")
        df = pd.read_csv(file_path)
        print(f"Columns in the loaded DataFrame: {df.columns}")
        dfs.append(df)
    return pd.concat(dfs, ignore_index=True)

def preprocess_data(df):
    # Drop rows with NaN values in target variable
    df.dropna(subset=['precipitationCal'], inplace=True)
    # Fill NaN values in predictor variables with mean
    df.fillna(df.mean(), inplace=True)
    return df

def split_data(df):
    X = df.drop(columns=['precipitationCal'])
    y = df['precipitationCal']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    return X_train, X_test, y_train, y_test

def build_rf_model():
    return RandomForestRegressor(random_state=42)

def build_cnn_model(input_shape):
    model = Sequential([
        Dense(64, activation='relu', input_shape=(input_shape,)),
        Dense(64, activation='relu'),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse', metrics=['mse'])
    return model

def main():
    # Load data
    file_paths = [
        "interpolated_insat_on_imerg_20180101.csv",
        "interpolated_insat_on_imerg_20181222.csv",
        "interpolated_insat_on_imerg_20180408.csv"
    ]
    df = load_data(file_paths)
    # Preprocess data
    df = preprocess_data(df)
    # Split data
    X_train, X_test, y_train, y_test = split_data(df)

    # Build and train Random Forest Regressor model
    rf_model = build_rf_model()
    rf_model.fit(X_train, y_train)

    # Build and train CNN model
    cnn_model = build_cnn_model(X_train.shape[1])
    cnn_model.fit(X_train, y_train, epochs=10, batch_size=32)

    # Evaluate models
    rf_pred = rf_model.predict(X_test)
    cnn_pred = cnn_model.predict(X_test)
    rf_mae = mean_absolute_error(y_test, rf_pred)
    cnn_mae = mean_absolute_error(y_test, cnn_pred)

    print("Random Forest MAE:", rf_mae)
    print("CNN MAE:", cnn_mae)

if __name__ == "__main__":
    main()


Loading data from file: interpolated_insat_on_imerg_20180101.csv


FileNotFoundError: [Errno 2] No such file or directory: 'interpolated_insat_on_imerg_20180101.csv'

In [17]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

def load_data_from_directory(directory_path):
    """
    Load and concatenate data from CSV files in a directory.

    Args:
    - directory_path (str): Path to the directory containing CSV files.

    Returns:
    - df (DataFrame): Concatenated DataFrame containing data from all CSV files.
    """
    dfs = []
    for filename in os.listdir(directory_path):
        if filename.endswith(".csv"):
            file_path = os.path.join(directory_path, filename)
            print("Loading data from file:", filename)
            df = pd.read_csv(file_path)
            dfs.append(df)
    concatenated_df = pd.concat(dfs, ignore_index=True)
    return concatenated_df

def preprocess_data(df):
    """
    Preprocess the concatenated DataFrame.

    Args:
    - df (DataFrame): Concatenated DataFrame containing data from all CSV files.

    Returns:
    - X (ndarray): Features.
    - y (ndarray): Target variable.
    """
    # Drop rows with NaN values
    df = df.dropna()

    # Extract features (X) and target variable (y)
    X = df.drop(columns=['precipitationCal'])  # Assuming 'precipitationCal' is the target variable
    y = df['precipitationCal'].values

    return X, y

def build_rf_model():
    """
    Build Random Forest Regressor model.

    Returns:
    - rf_model: Random Forest Regressor model.
    """
    rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
    return rf_model

def build_cnn_model(input_shape):
    """
    Build Convolutional Neural Network (CNN) model.

    Args:
    - input_shape (tuple): Shape of the input data.

    Returns:
    - cnn_model: CNN model.
    """
    cnn_model = Sequential([
        Dense(64, activation='relu', input_shape=input_shape),
        Dropout(0.5),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(1)
    ])
    cnn_model.compile(optimizer=Adam(), loss='mse', metrics=['mae'])
    return cnn_model

def main():
    directory_path = "/Users/kunalpathak9826/Desktop/ISRO/Data/Interpolated_CSV"  # Change this to your directory path
    df = load_data_from_directory(directory_path)

    X, y = preprocess_data(df)

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Build and train Random Forest Regressor model
    rf_model = build_rf_model()
    rf_model.fit(X_train, y_train)

    # Build and train CNN model
    input_shape = (X_train.shape[1],)
    cnn_model = build_cnn_model(input_shape)
    cnn_model.fit(X_train, y_train, epochs=10, batch_size=32)

    # Evaluate models
    rf_pred = rf_model.predict(X_test)
    rf_mae = mean_absolute_error(y_test, rf_pred)
    print("Random Forest MAE:", rf_mae)

    cnn_pred = cnn_model.predict(X_test)
    cnn_mae = mean_absolute_error(y_test, cnn_pred)
    print("CNN MAE:", cnn_mae)

if __name__ == "__main__":
    main()


Loading data from file: interpolated_insat_on_imerg_20180101.csv
Loading data from file: interpolated_insat_on_imerg_20181222.csv
Loading data from file: interpolated_insat_on_imerg_20180408.csv


ValueError: With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.

In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.ensemble import RandomForestRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, LSTM

# Function to load and preprocess data
def load_and_preprocess_data(directory):
    dfs = []
    for filename in os.listdir(directory):
        if filename.endswith(".csv"):
            filepath = os.path.join(directory, filename)
            df = pd.read_csv(filepath)
            dfs.append(df)
    data = pd.concat(dfs, ignore_index=True)
    return data

# Function to perform PCA
def perform_pca(data):
    scaler = StandardScaler()
    imputer = SimpleImputer(strategy='mean')
    data_imputed = imputer.fit_transform(data)
    scaled_data = scaler.fit_transform(data_imputed)
    pca = PCA(n_components=10)  # You can change the number of components as per your requirement
    pca_data = pca.fit_transform(scaled_data)
    return pca_data

# Function to build CNN model
def build_cnn_model(input_shape):
    model = Sequential([
        Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=input_shape),
        MaxPooling1D(pool_size=2),
        Flatten(),
        Dense(50, activation='relu'),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse', metrics=['mse'])
    return model

# Function to build RNN model
def build_rnn_model(input_shape):
    model = Sequential([
        LSTM(50, activation='relu', input_shape=input_shape),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse', metrics=['mse'])
    return model

# Function to build Random Forest Regressor model
def build_rf_model():
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    return model

# Main function
def main():
    directory = "/Users/kunalpathak9826/Desktop/ISRO/Data/Interpolated_CSV"
    
    # Load and preprocess data
    data = load_and_preprocess_data(directory)
    X = data.iloc[:, :-1].values
    y = data.iloc[:, -1].values
    
    # Remove NaN values from y
    not_nan_indices = ~np.isnan(y)
    X = X[not_nan_indices]
    y = y[not_nan_indices]
    
    # Perform PCA
    pca_data = perform_pca(X)
    
    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(pca_data, y, test_size=0.3, random_state=42)

    # Build and train CNN model
    input_shape = (X_train.shape[1], 1)
    cnn_model = build_cnn_model(input_shape)
    cnn_model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)
    
    # Build and train RNN model
    rnn_model = build_rnn_model(input_shape)
    rnn_model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)
    
    # Build and train Random Forest Regressor model
    rf_model = build_rf_model()
    rf_model.fit(X_train, y_train)
    
    # Evaluate models
    cnn_pred = cnn_model.predict(X_test)
    rnn_pred = rnn_model.predict(X_test)
    rf_pred = rf_model.predict(X_test)
    
    # Calculate RMSE and MAE
    cnn_rmse = np.sqrt(mean_squared_error(y_test, cnn_pred))
    rnn_rmse = np.sqrt(mean_squared_error(y_test, rnn_pred))
    rf_rmse = np.sqrt(mean_squared_error(y_test, rf_pred))
    
    cnn_mae = mean_absolute_error(y_test, cnn_pred)
    rnn_mae = mean_absolute_error(y_test, rnn_pred)
    rf_mae = mean_absolute_error(y_test, rf_pred)
    
    print("CNN RMSE:", cnn_rmse)
    print("CNN MAE:", cnn_mae)
    print("RNN RMSE:", rnn_rmse)
    print("RNN MAE:", rnn_mae)
    print("Random Forest RMSE:", rf_rmse)
    print("Random Forest MAE:", rf_mae)

if __name__ == "__main__":
    main()


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
CNN RMSE: 16.80658827404362
CNN MAE: 12.745022051357113
RNN RMSE: 10.592317546901056
RNN MAE: 7.942599743213869
Random Forest RMSE: 5.42882076631188
Random Forest MAE: 3.172783383050234


In [2]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.ensemble import RandomForestRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, LSTM

# Function to load and preprocess data
def load_and_preprocess_data(directory):
    dfs = []
    for filename in os.listdir(directory):
        if filename.endswith(".csv"):
            filepath = os.path.join(directory, filename)
            print(f"Loading data from: {filepath}")
            df = pd.read_csv(filepath)
            print(f"Loaded data shape: {df.shape}")
            dfs.append(df)
    data = pd.concat(dfs, ignore_index=True)
    print(f"Concatenated data shape: {data.shape}")
    return data

# Function to perform PCA
def perform_pca(data):
    print("Performing PCA...")
    scaler = StandardScaler()
    imputer = SimpleImputer(strategy='mean')
    data_imputed = imputer.fit_transform(data)
    scaled_data = scaler.fit_transform(data_imputed)
    pca = PCA(n_components=10)  # You can change the number of components as per your requirement
    pca_data = pca.fit_transform(scaled_data)
    print(f"Data shape after PCA: {pca_data.shape}")
    print("PCA completed.")
    return pca_data

# Function to build CNN model
def build_cnn_model(input_shape):
    print("Building CNN model...")
    model = Sequential([
        Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=input_shape),
        MaxPooling1D(pool_size=2),
        Flatten(),
        Dense(50, activation='relu'),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse', metrics=['mse'])
    print("CNN model built.")
    return model

# Function to build RNN model
def build_rnn_model(input_shape):
    print("Building RNN model...")
    model = Sequential([
        LSTM(50, activation='relu', input_shape=input_shape),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse', metrics=['mse'])
    print("RNN model built.")
    return model

# Function to build Random Forest Regressor model
def build_rf_model():
    print("Building Random Forest Regressor model...")
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    print("Random Forest Regressor model built.")
    return model

# Main function
def main():
    directory = "/Users/kunalpathak9826/Desktop/ISRO/Data/Interpolated_CSV"
    
    # Load and preprocess data
    print("Loading and preprocessing data...")
    data = load_and_preprocess_data(directory)
    X = data.iloc[:, :-1].values
    y = data.iloc[:, -1].values
    
    # Remove NaN values from y
    print("Removing NaN values from target variable (y)...")
    not_nan_indices = ~np.isnan(y)
    X = X[not_nan_indices]
    y = y[not_nan_indices]
    
    # Print data shape after removing NaN values
    print(f"Data shape after removing NaN values: {X.shape}, {y.shape}")
    
    # Perform PCA
    pca_data = perform_pca(X)
    
    # Split data into training and testing sets
    print("Splitting data into training and testing sets...")
    X_train, X_test, y_train, y_test = train_test_split(pca_data, y, test_size=0.3, random_state=42)

    # Build and train CNN model
    print("Building and training CNN model...")
    input_shape = (X_train.shape[1], 1)
    cnn_model = build_cnn_model(input_shape)
    cnn_model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)
    
    # Build and train RNN model
    print("Building and training RNN model...")
    rnn_model = build_rnn_model(input_shape)
    rnn_model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)
    
    # Build and train Random Forest Regressor model
    print("Building and training Random Forest Regressor model...")
    rf_model = build_rf_model()
    rf_model.fit(X_train, y_train)
    
    # Evaluate models
    print("Evaluating models...")
    cnn_pred = cnn_model.predict(X_test)
    rnn_pred = rnn_model.predict(X_test)
    rf_pred = rf_model.predict(X_test)
    
    # Calculate RMSE and MAE
    cnn_rmse = np.sqrt(mean_squared_error(y_test, cnn_pred))
    rnn_rmse = np.sqrt(mean_squared_error(y_test, rnn_pred))
    rf_rmse = np.sqrt(mean_squared_error(y_test, rf_pred))
    
    cnn_mae = mean_absolute_error(y_test, cnn_pred)
    rnn_mae = mean_absolute_error(y_test, rnn_pred)
    rf_mae = mean_absolute_error(y_test, rf_pred)
    
    # Print evaluation results
    print("Evaluation results:")
    print("CNN RMSE:", cnn_rmse)
    print("CNN MAE:", cnn_mae)
    print("RNN RMSE:", rnn_rmse)
    print("RNN MAE:", rnn_mae)
    print("Random Forest RMSE:", rf_rmse)
    print("Random Forest MAE:", rf_mae)

if __name__ == "__main__":
    main()


Loading and preprocessing data...
Loading data from: /Users/kunalpathak9826/Desktop/ISRO/Data/Interpolated_CSV/interpolated_insat_on_imerg_20180105.csv
Loaded data shape: (25000, 12)
Loading data from: /Users/kunalpathak9826/Desktop/ISRO/Data/Interpolated_CSV/interpolated_insat_on_imerg_20180101.csv
Loaded data shape: (25000, 15)
Loading data from: /Users/kunalpathak9826/Desktop/ISRO/Data/Interpolated_CSV/interpolated_insat_on_imerg_20181222.csv
Loaded data shape: (25000, 9)
Loading data from: /Users/kunalpathak9826/Desktop/ISRO/Data/Interpolated_CSV/interpolated_insat_on_imerg_20180408.csv
Loaded data shape: (25000, 12)
Concatenated data shape: (100000, 39)
Removing NaN values from target variable (y)...
Data shape after removing NaN values: (25000, 38), (25000,)
Performing PCA...
Data shape after PCA: (25000, 10)
PCA completed.
Splitting data into training and testing sets...
Building and training CNN model...
Building CNN model...
CNN model built.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Ep

In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.ensemble import RandomForestRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, LSTM

# Function to load and preprocess data
def load_and_preprocess_data(directory):
    dfs = []
    for filename in os.listdir(directory):
        if filename.endswith(".csv"):
            filepath = os.path.join(directory, filename)
            print(f"Loading data from: {filepath}")
            df = pd.read_csv(filepath)
            print(f"Loaded data shape: {df.shape}")
            dfs.append(df)
    data = pd.concat(dfs, ignore_index=True)
    print(f"Concatenated data shape: {data.shape}")
    return data

# Function to perform PCA
def perform_pca(data):
    print("Performing PCA...")
    scaler = StandardScaler()
    imputer = SimpleImputer(strategy='mean')
    data_imputed = imputer.fit_transform(data)
    scaled_data = scaler.fit_transform(data_imputed)
    pca = PCA(n_components=10)  # You can change the number of components as per your requirement
    pca_data = pca.fit_transform(scaled_data)
    print(f"Data shape after PCA: {pca_data.shape}")
    print("PCA completed.")
    return pca_data

# Function to build CNN model
def build_cnn_model(input_shape):
    print("Building CNN model...")
    model = Sequential([
        Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=input_shape),
        MaxPooling1D(pool_size=2),
        Flatten(),
        Dense(50, activation='relu'),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse', metrics=['mse'])
    print("CNN model built.")
    return model

# Function to build RNN model
def build_rnn_model(input_shape):
    print("Building RNN model...")
    model = Sequential([
        LSTM(50, activation='relu', input_shape=input_shape),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse', metrics=['mse'])
    print("RNN model built.")
    return model

# Function to build Random Forest Regressor model
def build_rf_model():
    print("Building Random Forest Regressor model...")
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    print("Random Forest Regressor model built.")
    return model

# Function to classify rainfall intensity based on threshold
def classify_rainfall_intensity(predictions, no_rainfall_thresh, moderate_rainfall_thresh):
    intensities = []
    for prediction in predictions:
        if prediction < no_rainfall_thresh:
            intensities.append("No Rainfall")
        elif prediction < moderate_rainfall_thresh:
            intensities.append("Moderate Rainfall")
        else:
            intensities.append("Heavy Rainfall")
    return intensities
cnn_model = None
rnn_model = None

# Main function
def main():
    directory = "/Users/kunalpathak9826/Desktop/ISRO/Data/Interpolated CSV/2050"
    
    # Load and preprocess data
    print("Loading and preprocessing data...")
    data = load_and_preprocess_data(directory)
    X = data.iloc[:, :-1].values
    y = data.iloc[:, -1].values
    
    # Remove NaN values from y
    print("Removing NaN values from target variable (y)...")
    not_nan_indices = ~np.isnan(y)
    X = X[not_nan_indices]
    y = y[not_nan_indices]
    
    # Print data shape after removing NaN values
    print(f"Data shape after removing NaN values: {X.shape}, {y.shape}")
    
    # Perform PCA
    pca_data = perform_pca(X)
    
    # Split data into training and testing sets
    print("Splitting data into training and testing sets...")
    X_train, X_test, y_train, y_test = train_test_split(pca_data, y, test_size=0.3, random_state=42)

    # Build and train CNN model
    print("Building and training CNN model...")
    input_shape = (X_train.shape[1], 1)
    cnn_model = build_cnn_model(input_shape)
    cnn_model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)
    
    # Build and train RNN model
    print("Building and training RNN model...")
    rnn_model = build_rnn_model(input_shape)
    rnn_model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)
    
    # Build and train Random Forest Regressor model
    print("Building and training Random Forest Regressor model...")
    rf_model = build_rf_model()
    rf_model.fit(X_train, y_train)
    
    # Evaluate models
    print("Evaluating models...")
    cnn_pred = cnn_model.predict(X_test)
    rnn_pred = rnn_model.predict(X_test)
    rf_pred = rf_model.predict(X_test)
    
    # Print predictions
    print("CNN Predictions:", cnn_pred)
    print("RNN Predictions:", rnn_pred)
    print("Random Forest Predictions:", rf_pred)
    
    # Calculate RMSE and MAE
    cnn_rmse = np.sqrt(mean_squared_error(y_test, cnn_pred))
    rnn_rmse = np.sqrt(mean_squared_error(y_test, rnn_pred))
    rf_rmse = np.sqrt(mean_squared_error(y_test, rf_pred))
    
    cnn_mae = mean_absolute_error(y_test, cnn_pred)
    rnn_mae = mean_absolute_error(y_test, rnn_pred)
    rf_mae = mean_absolute_error(y_test, rf_pred)
       
    # Calculate MSE
    cnn_mse = mean_squared_error(y_test, cnn_pred)
    rnn_mse = mean_squared_error(y_test, rnn_pred)
    rf_mse = mean_squared_error(y_test, rf_pred)

    # Print evaluation results
    print("Evaluation results:")
    print("CNN RMSE:", cnn_rmse)
    print("CNN MAE:", cnn_mae)
    print("CNN MSE:", cnn_mse)
    
    print("RNN RMSE:", rnn_rmse)
    print("RNN MAE:", rnn_mae)
    print("RNN MSE:", rnn_mse)
    
    print("Random Forest RMSE:", rf_rmse)
    print("Random Forest MAE:", rf_mae)
    print("Random Forest MSE:", rf_mse)
    
    # Classify rainfall intensity for each model's predictions
    #no_rainfall_threshold = 1  # Example threshold for no rainfall
    #moderate_rainfall_threshold = 30  # Example threshold for moderate rainfall
    
    #cnn_intensity = classify_rainfall_intensity(cnn_pred.flatten(), no_rainfall_threshold, moderate_rainfall_threshold)
    #rnn_intensity = classify_rainfall_intensity(rnn_pred.flatten(), no_rainfall_threshold, moderate_rainfall_threshold)
    #rf_intensity = classify_rainfall_intensity(rf_pred, no_rainfall_threshold, moderate_rainfall_threshold)
    
    # Print the classified intensity
    #print("\nIntensity Classification:")
    #print("CNN Intensity:", cnn_intensity)
    #print("RNN Intensity:", rnn_intensity)
    #print("Random Forest Intensity:", rf_intensity)
    
    model_filenames = ['cnn_model.h5', 'rnn_model.h5', 'rf_model.pkl']
    models = [cnn_model, rnn_model, rf_model]
    for model, filename in zip(models, model_filenames):
        model.save(filename)
        absolute_path = os.path.abspath(filename)
        print(f"Model saved at: {absolute_path}")


if __name__ == "__main__":
    main()


Loading and preprocessing data...
Loading data from: /Users/kunalpathak9826/Desktop/ISRO/Data/Interpolated CSV/2050/interpolated_insat_on_imerg_20170109.csv
Loaded data shape: (25000, 145)
Loading data from: /Users/kunalpathak9826/Desktop/ISRO/Data/Interpolated CSV/2050/interpolated_insat_on_imerg_20170108.csv
Loaded data shape: (25000, 148)
Loading data from: /Users/kunalpathak9826/Desktop/ISRO/Data/Interpolated CSV/2050/interpolated_insat_on_imerg_20190101.csv
Loaded data shape: (25000, 142)
Loading data from: /Users/kunalpathak9826/Desktop/ISRO/Data/Interpolated CSV/2050/interpolated_insat_on_imerg_20180104.csv
Loaded data shape: (25000, 145)
Loading data from: /Users/kunalpathak9826/Desktop/ISRO/Data/Interpolated CSV/2050/interpolated_insat_on_imerg_20180110.csv
Loaded data shape: (25000, 148)
Loading data from: /Users/kunalpathak9826/Desktop/ISRO/Data/Interpolated CSV/2050/interpolated_insat_on_imerg_20180105.csv
Loaded data shape: (25000, 142)
Loading data from: /Users/kunalpatha

  saving_api.save_model(


AttributeError: 'RandomForestRegressor' object has no attribute 'save'