###### Data Modeling

In [15]:
# Importing dataset and libraries
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

# Load the dataset
data = pd.read_excel('final_vic_data.xlsx')

# Dropping suburb as Postcode and suburb means the same
data.drop(columns='suburb', inplace=True)
data.rename(columns = {'area_ha':'Area of the land per hectares','Solar_exposure':'Solar Exposure', 'carbon_gross_emissions':'Carbon Emissions', 
                       'tc_loss_ha':'Tree Loss per hectares'}, inplace=True)
#data.head()

# Features and target variable
features = ['Postcode', 'threshold', 'Area of the land per hectares', 'Year', 'Tree Loss per hectares']
target_variables = ['Rain', 'Solar Exposure', 'Temperature', 'Carbon Emissions']

# Split the data into features (X) and target variables (y)
X = data[features]
y = data[target_variables]

# Splitting the data into 80% training and 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Random Forest Regression model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model
rf_model.fit(X_train, y_train)

# Make predictions for an example input
example_input = [[3672, 75, 821143, 2023, 1]]
predictions = rf_model.predict(example_input)

# Print the predicted values
print("Predicted values:")
result = {}
risk = []
for i, var in enumerate(target_variables):
    value = predictions[0][i]
    unit = ''
    if var == 'Rain':
        unit = 'mm'
    elif var == 'Solar Exposure':
        unit = 'kWh/m^2'
    elif var == 'Temperature':
        unit = '°C'
    elif var == 'Carbon Emissions':
        unit = 'units'
    result[var] = f"{value} {unit}"
    #print(f"{var}: {value} {unit}")

    #value = float(result[var].split()[0])
    # Calculating risks based on thresholds
    # Rain
    if var == 'Rain':
        if (40 < float(result[var].split()[0]) <= 50):
            risk.append('Low Risk')
        elif (35 < float(result[var].split()[0]) <= 40 or 50 < result['Rain'] <= 55):
            risk.append('Medium Risk')
        else:
            risk.append('High Risk')

    # Solar Exposure
    elif var == 'Solar Exposure':
        if (14 < float(result[var].split()[0]) <= 16):
            risk.append('Low Risk')
        elif (10 < float(result[var].split()[0]) <= 14 or 16 < float(result[var].split()[0]) <= 20):
            risk.append('Medium Risk')
        else:
            risk.append('High Risk')

    # Temperature
    elif var == 'Temperature':
        if (18 < float(result[var].split()[0]) <= 24):
            risk.append('Low Risk')
        elif (10 < float(result[var].split()[0]) <= 18 or 24 < float(result[var].split()[0]) <= 30):
            risk.append('Medium Risk')
        else:
            risk.append('High Risk')

    # Carbon Emissions
    elif var == 'Carbon Emissions':
        if (0 < float(result[var].split()[0]) <= 90000):
            risk.append('Low Risk')
        elif (90000 < float(result[var].split()[0]) <= 100000):
            risk.append('Medium Risk')
        else:
            risk.append('High Risk')

# Printthe results
print(result)
print('\nRisk:\n',risk)

# Return the risk factor based on thresholds
high_count=0
low_count=0
med_count=0

for i in risk:
    if i == 'High Risk':
        high_count+=1
    elif i == 'Medium Risk':
        med_count+=1
    else:
        low_count+=1

print('\nFinal Result:')

if high_count >= 2:
    print('High Risk!!!')
elif high_count == 1:
    print('Medium Risk!!!')
else:
    print('Low Risk!!!')


Predicted values:
{'Rain': '49.03229367272723 mm', 'Solar Exposure': '16.674000000000003 kWh/m^2', 'Temperature': '16.012500000000006 °C', 'Carbon Emissions': '1295.91 units'}

Risk:
 ['Low Risk', 'Medium Risk', 'Medium Risk', 'Low Risk']

Final Result:
Low Risk!!!


In [3]:
                    ##### WORST SCORES....... IGNORE!!!!!!!! #####

y_pred = rf_model.predict(X_test)

from sklearn.metrics import mean_squared_error
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

import numpy as np
rmse = np.sqrt(mse)
print("Root Mean Squared Error:", rmse)

from sklearn.metrics import r2_score
r2 = r2_score(y_test, y_pred)
print("R-squared (R2) Score:", r2)

from sklearn.metrics import mean_absolute_percentage_error
mape = mean_absolute_percentage_error(y_test, y_pred)
print("MAPE Score:", mape)



Mean Squared Error: 89885258340.52019
Root Mean Squared Error: 299808.7029099059
R-squared (R2) Score: -0.0922296705728145
MAPE Score: 3.011515381337735e+19


In [6]:
# Importing necessary libraries
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor

# Load the dataset
data = pd.read_excel('final_vic_data.xlsx')

# Dropping suburb as Postcode and suburb means the same
data.drop(columns='suburb', inplace=True)
data.rename(columns={'area_ha':'Area of the land per hectares', 'Solar_exposure':'Solar Exposure', 
                     'carbon_gross_emissions':'Carbon Emissions', 'tc_loss_ha':'Tree Loss per hectares'}, inplace=True)

# Features and target variable
features = ['Postcode', 'threshold', 'Area of the land per hectares', 'Year', 'Tree Loss per hectares']
target_variables = ['Rain', 'Solar Exposure', 'Temperature', 'Carbon Emissions']

# Split the data into features (X) and target variables (y)
X = data[features]
y = data[target_variables]

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Splitting the data into 80% training and 20% testing
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Define the parameter grid for Grid Search
param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10]
}

# Initialize the Random Forest Regression model
rf_model = RandomForestRegressor(random_state=42)

# Perform Grid Search to find the best hyperparameters
grid_search = GridSearchCV(estimator=rf_model, param_grid=param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Get the best model from Grid Search
best_rf_model = grid_search.best_estimator_

# Train the best model
best_rf_model.fit(X_train, y_train)

# Make predictions for an example input
example_input = [[3672, 75, 821143, 2023, 1]]
example_input_scaled = scaler.transform(example_input)
predictions = best_rf_model.predict(example_input_scaled)

# Print the predicted values
print("Predicted values:")
result = {}
risk = []
for i, var in enumerate(target_variables):
    value = predictions[0][i]
    unit = ''
    if var == 'Rain':
        unit = 'mm'
    elif var == 'Solar Exposure':
        unit = 'kWh/m^2'
    elif var == 'Temperature':
        unit = '°C'
    elif var == 'Carbon Emissions':
        unit = 'units'
    result[var] = f"{value} {unit}"

    # Calculate risks based on thresholds
    # Rain
    if var == 'Rain':
        if (40 < float(result[var].split()[0]) <= 50):
            risk.append('Low Risk')
        elif (35 < float(result[var].split()[0]) <= 40 or 50 < float(result[var].split()[0]) <= 55):
            risk.append('Medium Risk')
        else:
            risk.append('High Risk')

    # Solar Exposure
    elif var == 'Solar Exposure':
        if (14 < float(result[var].split()[0]) <= 16):
            risk.append('Low Risk')
        elif (10 < float(result[var].split()[0]) <= 14 or 16 < float(result[var].split()[0]) <= 20):
            risk.append('Medium Risk')
        else:
            risk.append('High Risk')

    # Temperature
    elif var == 'Temperature':
        if (18 < float(result[var].split()[0]) <= 24):
            risk.append('Low Risk')
        elif (10 < float(result[var].split()[0]) <= 18 or 24 < float(result[var].split()[0]) <= 30):
            risk.append('Medium Risk')
        else:
            risk.append('High Risk')

    # Carbon Emissions
    elif var == 'Carbon Emissions':
        if (0 < float(result[var].split()[0]) <= 90000):
            risk.append('Low Risk')
        elif (90000 < float(result[var].split()[0]) <= 100000):
            risk.append('Medium Risk')
        else:
            risk.append('High Risk')

# Print the results and risk
print(result)
print('\nRisk:\n', risk)

# Return the risk factor based on thresholds
high_count=0
low_count=0
med_count=0

for i in risk:
    if i == 'High Risk':
        high_count+=1
    elif i == 'Medium Risk':
        med_count+=1
    else:
        low_count+=1

print('\nFinal Result:')

if high_count >= 2:
    print('High Risk!!!')
elif high_count == 1:
    print('Medium Risk!!!')
else:
    print('Low Risk!!!')


Predicted values:
{'Rain': '50.429498178787846 mm', 'Solar Exposure': '16.669 kWh/m^2', 'Temperature': '16.043500000000005 °C', 'Carbon Emissions': '1302.43 units'}

Risk:
 ['Medium Risk', 'Medium Risk', 'Medium Risk', 'Low Risk']

Final Result:
Low Risk!!!


In [7]:
                    ##### WORST SCORES....... IGNORE!!!!!!!! #####

y_pred = best_rf_model.predict(X_test)

from sklearn.metrics import mean_squared_error
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

import numpy as np
rmse = np.sqrt(mse)
print("Root Mean Squared Error:", rmse)

from sklearn.metrics import r2_score
r2 = r2_score(y_test, y_pred)
print("R-squared (R2) Score:", r2)

from sklearn.metrics import mean_absolute_percentage_error
mape = mean_absolute_percentage_error(y_test, y_pred)
print("MAPE Score:", mape)

Mean Squared Error: 55584349860.94543
Root Mean Squared Error: 235763.33442871354
R-squared (R2) Score: 0.22355255252220507
MAPE Score: 2.4979474693158322e+19


In [None]:
###### Below are tested ones.. not working #######

In [11]:
# # Importing necessary libraries
# import pandas as pd
# from sklearn.preprocessing import StandardScaler
# from sklearn.model_selection import train_test_split
# from sklearn.svm import SVR

# # Load the dataset
# data = pd.read_excel('final_vic_data.xlsx')

# # Dropping suburb as Postcode and suburb means the same
# data.drop(columns='suburb', inplace=True)
# data.rename(columns={'area_ha':'Area of the land per hectares', 'Solar_exposure':'Solar Exposure', 
#                      'carbon_gross_emissions':'Carbon Emissions', 'tc_loss_ha':'Tree Loss per hectares'}, inplace=True)

# # Features and target variable
# features = ['Postcode', 'threshold', 'Area of the land per hectares', 'Year', 'Tree Loss per hectares']
# target_variables = ['Rain', 'Solar Exposure', 'Temperature', 'Carbon Emissions']

# # Split the data into features (X) and target variables (y)
# X = data[features]
# y = data[target_variables]

# # Standardize features
# scaler = StandardScaler()
# X_scaled = scaler.fit_transform(X)

# # Splitting the data into 80% training and 20% testing
# X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# # Initialize the Support Vector Regression (SVR) model
# svr_model = SVR(kernel='rbf', C=1.0, epsilon=0.1)

# # Train the model
# svr_model.fit(X_train, y_train)

# # Make predictions for an example input
# example_input = [[3672, 75, 821143, 2023, 1]]
# example_input_scaled = scaler.transform(example_input)
# predictions = svr_model.predict(example_input_scaled)

# # Print the predicted values
# print("Predicted values:")
# result = {}
# risk = []
# for i, var in enumerate(target_variables):
#     value = predictions[0][i]
#     unit = ''
#     if var == 'Rain':
#         unit = 'mm'
#     elif var == 'Solar Exposure':
#         unit = 'kWh/m^2'
#     elif var == 'Temperature':
#         unit = '°C'
#     elif var == 'Carbon Emissions':
#         unit = 'units'
#     result[var] = f"{value} {unit}"

#     # Calculate risks based on thresholds
#         # Rain
#     if var == 'Rain':
#         if (40 < float(result[var].split()[0]) <= 50):
#             risk.append('Low Risk')
#         elif (35 < float(result[var].split()[0]) <= 40 or 50 < float(result[var].split()[0]) <= 55):
#             risk.append('Medium Risk')
#         else:
#             risk.append('High Risk')

#     # Solar Exposure
#     elif var == 'Solar Exposure':
#         if (14 < float(result[var].split()[0]) <= 16):
#             risk.append('Low Risk')
#         elif (10 < float(result[var].split()[0]) <= 14 or 16 < float(result[var].split()[0]) <= 20):
#             risk.append('Medium Risk')
#         else:
#             risk.append('High Risk')

#     # Temperature
#     elif var == 'Temperature':
#         if (18 < float(result[var].split()[0]) <= 24):
#             risk.append('Low Risk')
#         elif (10 < float(result[var].split()[0]) <= 18 or 24 < float(result[var].split()[0]) <= 30):
#             risk.append('Medium Risk')
#         else:
#             risk.append('High Risk')

#     # Carbon Emissions
#     elif var == 'Carbon Emissions':
#         if (0 < float(result[var].split()[0]) <= 90000):
#             risk.append('Low Risk')
#         elif (90000 < float(result[var].split()[0]) <= 100000):
#             risk.append('Medium Risk')
#         else:
#             risk.append('High Risk')

# # Print the results and risk
# print(result)
# print('\nRisk:\n', risk)

# # Return the risk factor based on thresholds
# high_count=0
# low_count=0
# med_count=0

# for i in risk:
#     if i == 'High Risk':
#         high_count+=1
#     elif i == 'Medium Risk':
#         med_count+=1
#     else:
#         low_count+=1

# print('\nFinal Result:')

# if high_count >= 2:
#     print('High Risk!!!')
# elif high_count == 1:
#     print('Medium Risk!!!')
# else:
#     print('Low Risk!!!')


#                     ##### WORST SCORES....... IGNORE!!!!!!!! #####

# y_pred = svr_model.predict(X_test)

# from sklearn.metrics import mean_squared_error
# mse = mean_squared_error(y_test, y_pred)
# print("Mean Squared Error:", mse)

# import numpy as np
# rmse = np.sqrt(mse)
# print("Root Mean Squared Error:", rmse)

# from sklearn.metrics import r2_score
# r2 = r2_score(y_test, y_pred)
# print("R-squared (R2) Score:", r2)

# from sklearn.metrics import mean_absolute_percentage_error
# mape = mean_absolute_percentage_error(y_test, y_pred)
# print("MAPE Score:", mape)

ValueError: y should be a 1d array, got an array of shape (2886, 4) instead.

In [None]:
# # Carbon emission
# # Choose a target variable (e.g., 'carbon_gross_emissions')
# target_column = 'carbon_gross_emissions'

# # Separate features and target variable
# col_todrop = ['Rain', 'Temperature', 'Solar_exposure', 'carbon_gross_emissions']
# X = df.drop(columns=col_todrop)
# y = df[target_column]

# # Encoding and standardizing

# # One-hot encode the 'suburb' column
# X_encoded = pd.get_dummies(X, columns=['suburb'], drop_first=True)

# # Standardize the numeric features
# scaler = StandardScaler()
# X_scaled = scaler.fit_transform(X_encoded)

# # Split the data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.5, random_state=42)
# print("Training set shape:")
# print("X_train shape:", X_train.shape)
# print("y_train shape:", y_train.shape)

# print("\nTesting set shape:")
# print("X_test shape:", X_test.shape)
# print("y_test shape:", y_test.shape)

# # Fitting and predicting the model
# clf = DecisionTreeRegressor()
# fit = clf.fit(X_train, y_train)
# y_pred_carbon = fit.predict(X_test)

# print("\nModel fitting completion success for Carbon Emission!!!")

In [None]:
# import pandas as pd
# from sklearn.preprocessing import StandardScaler
# from sklearn.model_selection import train_test_split
# from sklearn.tree import DecisionTreeRegressor

# # Load your dataset here (replace 'df' with your actual DataFrame)

# # Carbon emission
# # Choose a target variable (e.g., 'carbon_gross_emissions')
# target_column = 'carbon_gross_emissions'

# # Separate features and target variable
# col_todrop = ['Rain', 'Temperature', 'Solar_exposure', 'carbon_gross_emissions']
# X = df.drop(columns=col_todrop)
# # X = df.drop(columns=[target_column])
# y = df[target_column]

# # Encoding and standardizing

# # One-hot encode the 'suburb' and 'Postcode' columns
# X_encoded = pd.get_dummies(X, columns=['suburb', 'Postcode'], drop_first=True)

# # Standardize the numeric features
# scaler = StandardScaler()
# X_scaled = scaler.fit_transform(X_encoded)

# # Split the data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.5, random_state=42)

# # Fitting and predicting the model
# clf = DecisionTreeRegressor()
# fit = clf.fit(X_train, y_train)

# # User input for Postcode
# user_postcode = 3377  # Replace with the user's input

# # Create a feature vector for prediction
# # Include the user's Postcode as a one-hot encoded feature
# user_feature_vector = pd.DataFrame([[0] * len(X_encoded.columns)], columns=X_encoded.columns)
# user_feature_vector[f'Postcode_{user_postcode}'] = 1

# # Standardize the user feature vector using the same scaler
# user_feature_vector_scaled = scaler.transform(user_feature_vector)

# # Make a prediction for the user's feature vector
# predicted_emissions = fit.predict(user_feature_vector_scaled)


# print(f"Predicted 'carbon_gross_emissions' for Postcode {user_postcode}: {predicted_emissions[0]}")


In [None]:
# import pandas as pd
# from sklearn.preprocessing import StandardScaler
# from sklearn.model_selection import train_test_split
# from sklearn.tree import DecisionTreeRegressor

# # Load your dataset here (replace 'df' with your actual DataFrame)

# # Carbon emission
# # Choose a target variable (e.g., 'carbon_gross_emissions')
# target_column = 'carbon_gross_emissions'

# # Separate features and target variable
# col_todrop = ['Rain', 'Temperature', 'Solar_exposure', 'carbon_gross_emissions']
# X = df.drop(columns=col_todrop)
# # X = df.drop(columns=[target_column])
# y = df[target_column]

# # Encoding and standardizing

# # One-hot encode the 'suburb' column
# X_encoded = pd.get_dummies(X, columns=['suburb'], drop_first=True)

# # Standardize the numeric features
# scaler = StandardScaler()
# X_scaled = scaler.fit_transform(X_encoded)

# # Split the data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.5, random_state=42)

# # Fitting and predicting the model
# clf = DecisionTreeRegressor()
# fit = clf.fit(X_train, y_train)

# # User input for Postcode and tree_loss
# user_postcode = 3377  # Replace with the user's input for Postcode
# user_tree_loss = 50.0  # Replace with the user's input for tree_loss

# # Create a feature vector for prediction
# # Include the user's Postcode as a one-hot encoded feature
# user_feature_vector = pd.DataFrame([[0] * len(X_encoded.columns)], columns=X_encoded.columns)

# # Set the values for the user's Postcode and tree_loss
# user_feature_vector[f'Postcode_{user_postcode}'] = 1
# user_feature_vector['tree_loss'] = user_tree_loss

# # Make sure the number of features in user_feature_vector matches the number in X_encoded
# # If X_encoded has additional columns not in user_feature_vector, add them as zeros
# missing_columns = set(X_encoded.columns) - set(user_feature_vector.columns)
# for col in missing_columns:
#     user_feature_vector[col] = 0

# # Standardize the user feature vector using the same scaler
# user_feature_vector_scaled = scaler.transform(user_feature_vector)

# # Make a prediction for the user's feature vector
# predicted_emissions = fit.predict(user_feature_vector_scaled)

# print(f"Predicted 'carbon_gross_emissions' for Postcode {user_postcode} and tree_loss {user_tree_loss}: {predicted_emissions[0]}")


In [None]:
# import pandas as pd
# from sklearn.model_selection import train_test_split
# from sklearn.ensemble import RandomForestRegressor
# from sklearn.metrics import mean_squared_error, r2_score

# # Load your data into a DataFrame (assuming you have a CSV file)
# data = pd.read_excel('final_vic_data.xlsx')

# # Perform one-hot encoding for categorical variables
# data = pd.get_dummies(data, columns=['suburb', 'Postcode'])

# # Define your features (X) and target variable (y)
# X = data[['threshold', 'area_ha', 'Year', 'tc_loss_ha']]
# y_rain = data['Rain']
# y_solar_exposure = data['Solar_exposure']
# y_temperature = data['Temperature']
# y_carbon_gross_emissions = data['carbon_gross_emissions']

# # Split the data into training and testing sets
# X_train, X_test, y_rain_train, y_rain_test = train_test_split(X, y_rain, test_size=0.2, random_state=42)
# X_train, X_test, y_solar_exposure_train, y_solar_exposure_test = train_test_split(X, y_solar_exposure, test_size=0.2, random_state=42)
# X_train, X_test, y_temperature_train, y_temperature_test = train_test_split(X, y_temperature, test_size=0.2, random_state=42)
# X_train, X_test, y_carbon_emissions_train, y_carbon_emissions_test = train_test_split(X, y_carbon_gross_emissions, test_size=0.2, random_state=42)

# # Create separate Random Forest models for each target variable
# rf_rain = RandomForestRegressor(n_estimators=100, random_state=42)
# rf_solar_exposure = RandomForestRegressor(n_estimators=100, random_state=42)
# rf_temperature = RandomForestRegressor(n_estimators=100, random_state=42)
# rf_carbon_emissions = RandomForestRegressor(n_estimators=100, random_state=42)

# # Fit the models to the training data
# rf_rain.fit(X_train, y_rain_train)
# rf_solar_exposure.fit(X_train, y_solar_exposure_train)
# rf_temperature.fit(X_train, y_temperature_train)
# rf_carbon_emissions.fit(X_train, y_carbon_emissions_train)

# # Make predictions on the test data
# y_rain_pred = rf_rain.predict(X_test)
# y_solar_exposure_pred = rf_solar_exposure.predict(X_test)
# y_temperature_pred = rf_temperature.predict(X_test)
# y_carbon_emissions_pred = rf_carbon_emissions.predict(X_test)

# # Evaluate the models
# print("Rain - Mean Squared Error:", mean_squared_error(y_rain_test, y_rain_pred))
# print("Solar Exposure - Mean Squared Error:", mean_squared_error(y_solar_exposure_test, y_solar_exposure_pred))
# print("Temperature - Mean Squared Error:", mean_squared_error(y_temperature_test, y_temperature_pred))
# print("Carbon Gross Emissions - Mean Squared Error:", mean_squared_error(y_carbon_emissions_test, y_carbon_emissions_pred))

# print("Rain - R-squared:", r2_score(y_rain_test, y_rain_pred))
# print("Solar Exposure - R-squared:", r2_score(y_solar_exposure_test, y_solar_exposure_pred))
# print("Temperature - R-squared:", r2_score(y_temperature_test, y_temperature_pred))
# print("Carbon Gross Emissions - R-squared:", r2_score(y_carbon_emissions_test, y_carbon_emissions_pred))


In [None]:
# from flask import Flask, render_template, request
# import pandas as pd
# from sklearn.tree import DecisionTreeRegressor
# import joblib

# # Load your dataset
# df = pd.read_excel('final_vic_data.xlsx')

# # Choose a target variable
# # target_column = 'carbon_emission'
# #target_column = 'carbon_gross_emissions'
# # Define the target variables
# target_columns = ['Rain', 'Temperature', 'Solar_exposure', 'carbon_gross_emissions']

# # # Separate features and target variables
# # X = df.drop[['suburb', 'postcode', 'tree_loss']]
# # y = df[target_columns]

# # # Create a one-hot encoding for 'suburb' (optional, depending on your dataset)
# # X_encoded = pd.get_dummies(X, columns=['suburb'], drop_first=True)


# # # Separate features and target variable
# # col_todrop = ['Rain', 'Temperature', 'Solar_exposure', 'carbon_gross_emissions']

# # Separate features and target variable
# X = df.drop(columns=target_columns)
# y = df[target_columns]

# # Create a one-hot encoding for 'suburb' (optional, depending on your dataset)

# X_encoded = pd.get_dummies(X, columns=['suburb'], drop_first=True)

# # Train your Decision Tree Regressor model
# clf = DecisionTreeRegressor()
# clf.fit(X_train, y_train)

# # Save the trained model to a file
# joblib.dump(clf, 'model.pkl')


# # Initialize the Flask app
# app = Flask(__name__)

# # Load your trained Decision Tree Regressor model
# # Replace 'model.pkl' with the actual file path to your trained model
# model = joblib.load('model.pkl')

# @app.route('/', methods=['GET', 'POST'])
# def predict():
#     if request.method == 'POST':
#         # Get user input from the form
#         user_postcode = int(request.form['Postcode'])
#         user_tree_loss = float(request.form['tc_loss_ha'])

#         # Create a feature vector for prediction
#         user_feature_vector = pd.DataFrame({'Postcode': [user_postcode], 'tc_loss_ha': [user_tree_loss]})

#         # Use one-hot encoding if 'suburb' is categorical
#         user_feature_vector_encoded = pd.get_dummies(user_feature_vector, columns=['suburb'], drop_first=True)

#         # Make predictions using the model
#         predicted_features = model.predict(user_feature_vector_encoded)

#         # Display the predicted features
#         return render_template('result.html', predicted_features=predicted_features)

#     return render_template('index.html')

# if __name__ == '__main__':
#     app.run(debug=True)


In [None]:
# # Load your dataset here (replace 'df' with your actual DataFrame)
# df = pd.read_csv('your_dataset.csv')  # Replace 'your_dataset.csv' with the actual file path

# # Define the target variables
# target_columns = ['Rain', 'Temperature', 'Solar_exposure', 'carbon_gross_emissions']

# # Separate features and target variables
# X = df[['suburb', 'postcode', 'tree_loss']]
# y = df[target_columns]

# # Create a one-hot encoding for 'suburb' (optional, depending on your dataset)
# X_encoded = pd.get_dummies(X, columns=['suburb'], drop_first=True)


In [None]:
# # Rain
# # Choose a target variable (e.g., 'Rain')
# target_column = 'Rain'

# # Separate features and target variable
# X = df.drop(columns=[target_column])
# y = df[target_column]

# # Encoding and standardizing

# # One-hot encode the 'suburb' column
# X_encoded = pd.get_dummies(X, columns=['suburb'], drop_first=True)

# # Standardize the numeric features
# scaler = StandardScaler()
# X_scaled = scaler.fit_transform(X_encoded)

# # Split the data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.5, random_state=42)
# print("Training set shape:")
# print("X_train shape:", X_train.shape)
# print("y_train shape:", y_train.shape)

# print("\nTesting set shape:")
# print("X_test shape:", X_test.shape)
# print("y_test shape:", y_test.shape)

# # Fitting and predicting the model
# regressor = DecisionTreeRegressor() 
# fit = regressor.fit(X_train, y_train)
# y_pred_rain = fit.predict(X_test)

# print("\nModel fitting completion success for Rain!!!")

In [None]:
# # Solar Exposure
# # Choose a target variable (e.g., 'Solar_exposure')
# target_column = 'Solar_exposure'

# # Separate features and target variable
# X = df.drop(columns=[target_column])
# y = df[target_column]

# # Encoding and standardizing

# # One-hot encode the 'suburb' column
# X_encoded = pd.get_dummies(X, columns=['suburb'], drop_first=True)

# # Standardize the numeric features
# scaler = StandardScaler()
# X_scaled = scaler.fit_transform(X_encoded)

# # Split the data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.5, random_state=42)
# print("Training set shape:")
# print("X_train shape:", X_train.shape)
# print("y_train shape:", y_train.shape)

# print("\nTesting set shape:")
# print("X_test shape:", X_test.shape)
# print("y_test shape:", y_test.shape)

# # Fitting and predicting the model
# regressor = DecisionTreeRegressor()
# fit = regressor.fit(X_train, y_train)
# y_pred_solar = fit.predict(X_test)

# print("\nModel fitting completion success for Solar_exposure!!!")

In [None]:
# # Temperature
# # Choose a target variable (e.g., 'Temperature')
# target_column = 'Temperature'

# # Separate features and target variable
# X = df.drop(columns=[target_column])
# y = df[target_column]

# # Encoding and standardizing

# # One-hot encode the 'suburb' column
# X_encoded = pd.get_dummies(X, columns=['suburb'], drop_first=True)

# # Standardize the numeric features
# scaler = StandardScaler()
# X_scaled = scaler.fit_transform(X_encoded)

# # Split the data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.5, random_state=42)
# print("Training set shape:")
# print("X_train shape:", X_train.shape)
# print("y_train shape:", y_train.shape)

# print("\nTesting set shape:")
# print("X_test shape:", X_test.shape)
# print("y_test shape:", y_test.shape)

# # Fitting and predicting the model
# regressor = DecisionTreeRegressor()
# fit = regressor.fit(X_train, y_train)
# y_pred_temp = fit.predict(X_test)

# print("\nModel fitting completion success for Temperature!!!")

In [None]:
# # Printing the predictions
# print('Predictions of Carbon Emission: ', y_pred_carbon)
# print('\nPredictions of Rain: ', y_pred_rain)
# print('\nPredictions of Solar Exposure: ', y_pred_solar)
# print('\nPredictions of Temperature: ', y_pred_temp)

In [None]:
# import tensorflow as tf
# from tensorflow import keras
# from tensorflow.keras import layers

# # Define your neural network model
# model = keras.Sequential([
#     #layers.Input(shape=(5,)),  # Input layer with 10 features
#    #layers.Dense(128, activation='relu'),
#     layers.Dense(64, activation='relu'),  # Hidden layer with 64 neurons and ReLU activation
#     #layers.Dropout(0.3),  # Dropout layer
#     layers.Dense(32, activation='relu'),  # Hidden layer with 32 neurons and ReLU activation
#     layers.Dense(1, activation='linear')  # Output layer for regression with linear activation
# ])

# # Compile the model with mean squared error as the loss and Adam optimizer
# optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
# model.compile(loss='mean_squared_error', optimizer=optimizer)

# # Define the number of epochs and batch size
# epochs = 10
# batch_size = 32

# # Train the model with your data (X_train, y_train)
# history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_val, y_val))

# # Evaluate the model on the test set
# test_loss = model.evaluate(X_test, y_test)
# print(f'Test loss: {test_loss}')


In [None]:
# import pandas as pd
# from sklearn.ensemble import RandomForestRegressor

# # Load your data into a DataFrame (assuming you have a CSV file)
# data = pd.read_excel('final_vic_data.xlsx')

# # Perform one-hot encoding for categorical variables
# data = pd.get_dummies(data, columns=['suburb', 'Postcode'])

# # Define your features (X) and target variable (y)
# X = data[['threshold', 'area_ha', 'Year', 'tc_loss_ha']]
# y_rain = data['Rain']
# y_solar_exposure = data['Solar_exposure']
# y_temperature = data['Temperature']
# y_carbon_gross_emissions = data['carbon_gross_emissions']

# # Create separate Random Forest models for each target variable
# rf_rain = RandomForestRegressor(n_estimators=100, random_state=42)
# rf_solar_exposure = RandomForestRegressor(n_estimators=100, random_state=42)
# rf_temperature = RandomForestRegressor(n_estimators=100, random_state=42)
# rf_carbon_emissions = RandomForestRegressor(n_estimators=100, random_state=42)

# # Fit the models to the entire dataset
# rf_rain.fit(X, y_rain)
# rf_solar_exposure.fit(X, y_solar_exposure)
# rf_temperature.fit(X, y_temperature)
# rf_carbon_emissions.fit(X, y_carbon_gross_emissions)

# # Collect user input for 'Postcode' and 'tc_loss_ha'
# user_postcode = 3377  # Replace with the user's input for Postcode
# user_tc_loss_ha = 100  # Replace with the user's input for tc_loss_ha

# # Prepare the user input data
# user_input = pd.DataFrame({
#     'threshold': [50],  # Example threshold value, replace with the user's input
#     'area_ha': [421143],  # Example area_ha value, replace with the user's input
#     'Year': [2003],  # Example Year value, replace with the user's input
#     'tc_loss_ha': [user_tc_loss_ha],  # User's input for tc_loss_ha
#     f'Postcode_{user_postcode}': [user_postcode]  # One-hot encoding for user's Postcode
# })

# # Make predictions using the models
# rain_prediction = rf_rain.predict(user_input)[0]
# solar_exposure_prediction = rf_solar_exposure.predict(user_input)[0]
# temperature_prediction = rf_temperature.predict(user_input)[0]
# carbon_emissions_prediction = rf_carbon_emissions.predict(user_input)[0]

# print("Predicted Rain:", rain_prediction)
# print("Predicted Solar Exposure:", solar_exposure_prediction)
# print("Predicted Temperature:", temperature_prediction)
# print("Predicted Carbon Gross Emissions:", carbon_emissions_prediction)
