In [None]:
# import packages
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
import pandas as pd 
import numpy as np

# PanelOLS

In [None]:
# import packages
from linearmodels import PanelOLS

# Reading data
data = pd.read_excel('filename')
# Select the desired columns
data = data[['Region', 'Year', 'Prec', 'Tmean', 'Tmin', 'Tmax', 'Srad', 'Lrad', 'Ssd', 'EAT', 'AAT', 'Yield', 'Yield_i']]
# Standardized features
data = data.set_index(['Region', 'Year'])
scaler = StandardScaler()
data[['Prec', 'Tmean', 'Tmin', 'Tmax', 'Srad', 'Lrad', 'Ssd', 'EAT', 'AAT']] = scaler.fit_transform(data[['Prec', 'Tmean', 'Tmin', 'Tmax', 'Srad', 'Lrad', 'Ssd', 'EAT', 'AAT']])
# 拟Fit the panelOLS model
model = PanelOLS.from_formula('Yield ~ 1 + Yield_i + Prec + Tmean + Srad + Ssd + EAT + Tmax + Tmin + AAT', data=data).fit()
print(model)
# Model prediction
data['Yield_predicted'] = model.predict()
# calculate RMSE
rmse = np.sqrt(mean_squared_error(data['Yield'], data['Yield_predicted']))
# calculate MAE
mae = mean_absolute_error(data['Yield'], data['Yield_predicted'])
print("RMSE:", rmse)
print("MAE:", mae)

# RFR model

In [None]:
# Import packages
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.preprocessing import StandardScaler

# Reading data and process features
data = pd.read_excel('filename')
data = data[['Region', 'Year', 'Yield_i', 'Yield_change', 'Prec', 'Tmean', 'Tmax', 'Tmin', 'Srad', 'Ssd', 'EAT', 'Lrad', 'AAT', 'Pres','Yield']]
data = data.set_index(['Region', 'Year'])
y = data['Yield']

# Split the data set into a training set and a test set
X = data[['Prec', 'Tmean', 'Tmax', 'Tmin', 'Srad', 'Ssd', 'EAT', 'Lrad', 'AAT', 'Pres', 'Yield_i']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardized feature
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Create a random forest regression model
rf_model = RandomForestRegressor(n_estimators=500, random_state=42, oob_score=True)
# Fit the model on the training set
rf_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = rf_model.predict(X_test)
# Evaluate model performance
r2 = r2_score(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae = mean_absolute_error(y_test, y_pred)

# Print evaluation results
print(f'R-squared (R^2): {r2}')
print(f'Root Mean Squared Error (RMSE): {rmse}')
print(f'Mean Absolute Error (MAE): {mae}')

# GBDT model

In [None]:
# import packages
from sklearn.ensemble import  GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV
import time

# Read data and process features
data = pd.read_excel('filename')
data = data[['Region', 'Year', 'Yield_i', 'Yield_change', 'Prec', 'Tmean', 'Tmax', 'Tmin', 'Srad', 'Ssd', 'EAT', 'Lrad', 'AAT', 'Pres','Yield']]
data = data.set_index(['Region', 'Year'])
y = data['Yield']
# Split the data set into a training set and a test set
X = data[['Prec', 'Tmean', 'Pres','Tmax', 'Tmin', 'Srad', 'Ssd', 'EAT', 'Lrad', 'AAT', 'Yield_i']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Standardized feature
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Create a random forest regression model
rf_model = GradientBoostingRegressor(n_estimators=500, random_state=42, learning_rate=0.1) 
# Fit the model on the training set
rf_model.fit(X_train, y_train)
# Make predictions on the test set
y_pred = rf_model.predict(X_test)
# Evaluate model performance
r2 = r2_score(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae = mean_absolute_error(y_test, y_pred)
# Print evaluation results
print(f'R-squared (R^2): {r2}')
print(f'Root Mean Squared Error (RMSE): {rmse}')
print(f'Mean Absolute Error (MAE): {mae}')

# SVR model

In [None]:
# import packages
from sklearn.svm import SVR

# Read data and process features
data = pd.read_excel('filename')
data = data[['Region', 'Year', 'Prec', 'Yield_i', 'Tmean', 'Tmax', 'Tmin', 'Srad', 'Ssd', 'EAT', 'Lrad', 'AAT', 'Pres', 'Yield']]
data = data.set_index(['Region', 'Year'])
y = data['Yield']

# Split the data set into a training set and a test set
X = data[['Prec', 'Tmean', 'Tmax', 'Tmin', 'Srad', 'Ssd', 'EAT', 'Lrad', 'AAT', 'Pres', 'Yield_i']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardized feature
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)
# Create a random forest regression model
svr_model = SVR(kernel='linear', C=1)  

# Fit the model on the training set
svr_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = svr_model.predict(X_test)
# Evaluate model performance
r2 = r2_score(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae = mean_absolute_error(y_test, y_pred)

# Print evaluation results
print(f'R-squared (R^2): {r2}')
print(f'Root Mean Squared Error (RMSE): {rmse}')
print(f'Mean Absolute Error (MAE): {mae}')

# NN model

In [None]:
# import packages
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import MinMaxScaler
# Read data and process features
data = pd.read_excel('filename')
data = data[['Region', 'Year', 'Prec', 'Yield_i', 'Tmean', 'Tmax', 'Tmin', 'Srad', 'Ssd', 'EAT', 'Lrad', 'AAT', 'Pres', 'Yield']]
data = data.set_index(['Region', 'Year'])
y = data['Yield']

# Split the data set into a training set and a test set
X = data[['Prec', 'Tmean', 'Tmax', 'Tmin', 'Srad', 'Ssd', 'EAT', 'Lrad', 'AAT', 'Pres', 'Yield_i']]
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=42)

# create MinMaxScaler
scaler = MinMaxScaler()
# normalization
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Create a neural network regression model
model = keras.Sequential([
    keras.layers.Input(shape=(X_train.shape[1],)), 
    keras.layers.Dense(128, activation='relu'),  
    keras.layers.Dense(64, activation='relu'),  
    keras.layers.Dense(1) 
])
# Compilation Model
model.compile(optimizer='adam', loss='mean_squared_error')  
# training model
history = model.fit(X_train, y_train, epochs=100, batch_size=32,
                    validation_data=(X_test, y_test), verbose=2)

# normalization
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Make predictions on the test set
y_pred = model.predict(X_test)
# Evaluate model performance
r2 = r2_score(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae = mean_absolute_error(y_test, y_pred)
# Print evaluation results
print(f'R-squared (R^2): {r2}')
print(f'Root Mean Squared Error (RMSE): {rmse}')
print(f'Mean Absolute Error (MAE): {mae}')

# Ensemble learning

In [None]:
# Import packages
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import Ridge
from sklearn.ensemble import VotingRegressor

# Read data and process features
data = pd.read_excel('filename')
data = data[['Region', 'Year', 'Yield_i', 'Yield_change', 'Prec', 'Tmean', 'Tmax', 'Tmin', 'Srad', 'Ssd', 'EAT', 'Lrad', 'AAT', 'Pres','Yield']]
data = data.set_index(['Region', 'Year'])
y = data['Yield']

# Split the data set into a training set and a test set
X = data[['Prec', 'Tmean', 'Pres','Tmax', 'Tmin', 'Srad', 'Ssd', 'EAT', 'Lrad', 'AAT', 'Yield_i']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardized feature
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Initialize each regression model
rf_model = RandomForestRegressor(n_estimators=500, random_state=42)
gb_model = GradientBoostingRegressor(n_estimators=500, random_state=42, learning_rate=0.1)
svm_model = Ridge()

# Define multi-model fusion
voting_regressor = VotingRegressor([('rf', rf_model), ('gb', gb_model), ('SVR', svm_model)])

# Training multi-model fusion
voting_regressor.fit(X_train, y_train)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)
# Standardized feature
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# predict
y_pred = voting_regressor.predict(X_test)

# Evaluate model performance
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
print(f"R Mean Squared Error: {rmse}")
print(f"R-squared: {r2}")
print(f"Mean Absolute Error (MAE): {mae}")