In [16]:
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPRegressor
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import ParameterGrid
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import ElasticNet
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from lightgbm import LGBMRegressor

In [6]:
# load your dataset using pandas
df = pd.read_csv('data/Image_UHI_pred.csv')

#drop columns

df2 = df.select_dtypes(['number'])

df4 = df2[df2['road'].notna()]

df4 = df4.drop(['mvt_id', 'ref_length'], axis = 1)


# split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df4.drop('UHI1', axis=1),
                                                    df4['UHI1'], test_size=0.2)


df4.head()


Unnamed: 0,distance,angle,id,road,Trees,sky,buildings,people,bikes,cars,pavement,river,water,UHI1
0,0.0,60.302484,1,8.3734,4.4206,49.9804,7.1245,1.0,0.0,4.0,0.0,0.0,0.0,0.740203
1,50.0,60.302484,2,7.87575,9.33575,43.16455,6.3117,0.0,0.0,4.0,9.3524,0.0,0.0,0.629178
10,100.0,332.221145,11,2.39065,4.31275,46.58645,11.9832,0.0,0.0,2.0,6.18275,0.0,0.0,0.725191
11,150.0,332.221145,12,2.39065,4.31275,46.58645,11.9832,0.0,0.0,2.0,6.18275,0.0,0.0,0.717797
12,200.0,332.221145,13,2.19015,3.92915,49.9564,7.2529,0.0,0.0,7.0,0.0,0.0,0.0,0.664396


In [3]:
# create an instance of the MLP classifier
mlp = MLPRegressor(hidden_layer_sizes=(100, 50), max_iter=500)

# fit the MLP classifier to the training data
mlp.fit(X_train, y_train)

# make predictions on the test data
predictions = mlp.predict(X_test)

# calculate the accuracy of the predictions
mse = mean_squared_error(y_test, predictions)
r2 = r2_score(y_test, predictions)

# print the accuracy
print('MSE:', mse, 'R2:', r2)

MSE: 14520.200504577624 R2: -52337.48403203651


In [4]:
def cross_validate_mlp(X, y):
    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Set up the MLP regressor
    mlp = MLPRegressor(max_iter=1000)

    # Define the parameter grid to search over
    param_grid = {
        'hidden_layer_sizes': [(10,), (50,), (100,), (10, 10), (50, 50), (100, 100)],
        'activation': ['logistic', 'tanh', 'relu'],
        #'alpha': [0.0001, 0.001, 0.01, 0.1, 1.0]
    }

    # Set up the GridSearchCV object
    grid_search = GridSearchCV(mlp, param_grid=param_grid, cv=5)

    # Fit the GridSearchCV object to the training data
    grid_search.fit(X_train, y_train)

    # Print the best parameters found by GridSearchCV
    print('Best parameters:', grid_search.best_params_)

    # Evaluate the performance of the best MLP model on the test data
    best_mlp = grid_search.best_estimator_
    y_pred = best_mlp.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print('Test MSE:', mse, 'r2:', r2)

# split the dataset into input (X) and output (y) variables
X = df4.drop('UHI1', axis=1)
y = df4['UHI1']

# Call the cross-validation function
cross_validate_mlp(X, y)


Best parameters: {'activation': 'logistic', 'alpha': 0.0001, 'hidden_layer_sizes': (50, 50)}
Test MSE: 0.2707172028926467 r2: 0.006439179385503646


In [9]:
X = df4.drop('UHI1', axis=1)
y = df4['UHI1']

def svr_tune(X, y):
    # svr_params = {'C':[1, 10]}
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    svr_params = {'C':[1, 10, 100, 1000],
                  'gamma':[0.5, 0.1, 0.01, 0.001],
                  'epsilon':[0.1, 0.2, 0.3, 0.5],
                  'kernel':['rbf','poly','sigmoid']}
    
    grid_search = GridSearchCV(SVR(), svr_params, refit=True, scoring='neg_root_mean_squared_error').fit(X_train, y_train)

    # Print the best parameters found by GridSearchCV
    print('Best parameters:', grid_search.best_params_)

       # Evaluate the performance of the best MLP model on the test data
    best_svr = grid_search.best_estimator_
    y_pred = best_svr.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print('Test MSE:', mse, 'r2:', r2)

    return best_svr


def rf_tune(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    # rf_params = {'n_estimators':[100, 1000]}
    rf_params = {'n_estimators':[200, 500, 1000],
                  'max_features':['sqrt','log2'],
                  'min_samples_split':[2, 4, 8],
                  'min_samples_leaf':[0.001, 0.01, 0.1]}
    grid_search = GridSearchCV(RandomForestRegressor(), rf_params, refit=False, scoring='neg_root_mean_squared_error').fit(X_train, y_train)

    # Print the best parameters found by GridSearchCV
    print('Best parameters:', grid_search.best_params_)

    # Evaluate the performance of the best MLP model on the test data
    best_rf = grid_search.best_estimator_
    y_pred = best_rf.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print('Test MSE:', mse, 'r2:', r2)


    return best_rf


def gbt_tune(X, y):
    
    #Train test split data 

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # gbt_params = {'booster': ['gbtree'],
    #                'objective': ['reg:squarederror'],
    #                'eta': [0.001, 0.003]}
    gbt_params = {'learning_rate': [0.0001, 0.001, 0.01, 0.05, 0.1],
                   'num_leaves': [10, 20, 30, 50, 100],
                   'max_depth':[3, 5, 7, 9, 15],
                   'max_bin':[10, 20, 40, 60, 80],
                   'min_data_in_leaf':[50, 100, 200, 300, 500],
                   'reg_alpha':[0.1, 0.3, 0.7],
                   'reg_lambda':[0.1, 0.3, 0.7]}   
    grid_search = GridSearchCV(LGBMRegressor(), gbt_params, refit=False, scoring='mean_squared_error').fit(X_train, y_train)

    # Print the best parameters found by GridSearchCV
    print('Best parameters:', grid_search.best_params_)

    # Evaluate the performance of the best MLP model on the test data
    best_gbt = grid_search.best_estimator_
    y_pred = best_gbt.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print('Test MSE:', mse, 'r2:', r2)

    return best_gbt


svr_tune(X, y)

KeyboardInterrupt: 

In [17]:
import tensorflow as tf
import itertools

X = df4.drop('UHI1', axis=1)
y = df4['UHI1']

def create_mlp_model(hidden_units, activation):
    model = tf.keras.Sequential()
    for units in hidden_units:
        model.add(tf.keras.layers.Dense(units, activation=activation))
    model.add(tf.keras.layers.Dense(1))  # Output layer with 1 unit for regression
    return model

def cross_validate_mlp(X, y):
    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define the parameter grid to search over
    param_grid = {
        'hidden_units': [(10,), (50,), (100,), (10, 10), (50, 50), (100, 100)],
        'activation': ['sigmoid', 'tanh', 'relu'],
        # 'alpha': [0.0001, 0.001, 0.01, 0.1, 1.0]
    }

    best_mse = float('inf')
    best_r2 = None
    best_params = None

    # Perform grid search
    for params in ParameterGrid(param_grid):
        hidden_units = params['hidden_units']
        activation = params['activation']

        # Create and compile the MLP model
        model = create_mlp_model(hidden_units, activation)
        model.compile(optimizer='adam', loss='mean_squared_error')

        # Train the model
        model.fit(X_train, y_train, epochs=100, verbose=0)

        # Evaluate the model
        y_pred = model.predict(X_test)
        mse = mean_squared_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)

        # Update the best parameters
        if mse < best_mse:
            best_mse = mse
            best_r2 = r2
            best_params = params

    # Print the best parameters and evaluation metrics
    print('Best parameters:', best_params)
    print('Test MSE:', best_mse)
    print('Test R2:', best_r2)

cross_validate_mlp(X,y)



2023-05-23 14:03:51.663584: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-05-23 14:03:51.664631: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...




KeyboardInterrupt: 

In [19]:
import tensorflow as tf

print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))


Num GPUs Available:  0
