MULTILINEAR PERCEPTRON

In [3]:
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.cross_decomposition import PLSRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.multioutput import MultiOutputRegressor


In [9]:
import pandas as pd

def preprocess_data(df):
    # Check for missing values
    if df.isnull().values.any():
        # Calculate mean for numeric columns only
        numeric_cols = df.select_dtypes(include='number').columns
        df[numeric_cols] = df[numeric_cols].fillna(df[numeric_cols].mean())
    
    # Change non-numeric values to numeric values
    df = df.apply(pd.to_numeric, errors='coerce')
    
    return df

# Example usage:
# Load your dataset into a pandas DataFrame
df = pd.read_csv('values.csv')

# Preprocess the data
preprocessed_df = preprocess_data(df)
print (preprocessed_df)


         A(410)       B(435)       C(460)       D(485)       E(510)   F(535)  \
0   2429.580000  1108.990000  2091.930000   737.330000   835.140000  1376.82   
1   1707.900000   725.380000  1974.490000   670.810000   835.930000  1685.28   
2   1750.410000  1102.020000  2481.050000   816.800000   960.650000  1741.43   
3   2546.680000  1548.400000  2791.560000  1007.160000  1128.000000  1573.73   
4   3090.540000  1266.420000  2933.880000   940.610000  1109.840000  2047.64   
..          ...          ...          ...          ...          ...      ...   
95  2247.430000   833.990000  2195.430000   523.900000   786.990000  1074.36   
96  2374.070000   989.420000  2382.530000   567.000000   918.030000  1189.65   
97  2318.550000   821.030000  2096.900000   507.260000   781.470000  1070.61   
98  3289.170000  1234.540000  3234.430000   873.160000  1112.210000  1742.18   
99  2163.039798  1015.779354  2471.640101   744.820303   963.049697      NaN   

        G(560)      H(585)       R(610)

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.neural_network import MLPRegressor
from sklearn.multioutput import MultiOutputRegressor


data = pd.read_csv("preprocessed_data.csv")  

# Print the column names to check for mismatches
print(data.columns)

feature_columns = ['A(410)', 'B(435)', 'C(460)', 'D(485)', 'E(510)', 'F(535)', 'G(560)',
                   'H(585)', 'R(610)', 'I(645)', 'S(680)', 'J(705)', 'U(760)',
                   'V(810)', 'W(860)', 'K(900)', 'L(940)', 'T(730)']

target_columns = ['K (kg/ha)', 'Ca (meq/100g)', 'Mg (meq/100g)',
                  'S (ppm)', 'Fe (ppm)', 'Mn (ppm)', 'Cu (ppm)', 'Zn (ppm)', 'B (ppm)']


missing_features = [col for col in feature_columns if col not in data.columns]
missing_targets = [col for col in target_columns if col not in data.columns]

if missing_features:
    print(f"Feature columns missing from dataset: {missing_features}")

if missing_targets:
    print(f"Target columns missing from dataset: {missing_targets}")

# Separate features (X) and target variables (y)
X = data[feature_columns]
y = data[target_columns]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Handle missing values
imputer = SimpleImputer(strategy='mean')
X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)

# Train Multilayer Perceptron (MLP) model for multiple target variables
mlp = MLPRegressor(hidden_layer_sizes=(100, 100), max_iter=500, random_state=42)

# Wrap MLP in MultiOutputRegressor to handle multiple outputs
multioutput_mlp = MultiOutputRegressor(mlp)

# Fit the MLP model
multioutput_mlp.fit(X_train_imputed, y_train)

# Predict for the test set
y_pred_mlp = multioutput_mlp.predict(X_test_imputed)

# Calculate Mean Squared Error
mse_mlp = mean_squared_error(y_test, y_pred_mlp)
print("MLP Mean Squared Error:", mse_mlp)

# Function to predict nutrient values based on user input wavelengths
def predict_nutrients(wavelengths):
    # Convert the input list to a DataFrame
    user_data = pd.DataFrame([wavelengths], columns=feature_columns)
    
    # Handle missing values
    user_data_imputed = imputer.transform(user_data)
    
    # Predict using the trained MLP model
    mlp_prediction = multioutput_mlp.predict(user_data_imputed)
    
    return mlp_prediction

# Function to get user input for wavelengths
def get_user_wavelengths():
    wavelengths = []
    for i in range(18):
        while True:
            try:
                value = float(input(f"Enter value for wavelength {i+1}: "))
                wavelengths.append(value)
                break
            except ValueError:
                print("Invalid input. Please enter a numerical value.")
    return wavelengths

# Get user input
user_wavelengths = get_user_wavelengths()

# Predict nutrient values
mlp_pred = predict_nutrients(user_wavelengths)

# Print the predictions with labels
nutrients = ['K (kg/ha)', 'Ca (meq/100g)', 'Mg (meq/100g)', 'S (ppm)', 'Fe (ppm)', 'Mn (ppm)', 'Cu (ppm)', 'Zn (ppm)', 'B (ppm)']

print("\nMLP Prediction:")
for nutrient, value in zip(nutrients, mlp_pred[0]):
    print(f"{nutrient}: {value:.2f}")


Index(['A(410)', 'B(435)', 'C(460)', 'D(485)', 'E(510)', 'F(535)', 'G(560)',
       'H(585)', 'R(610)', 'I(645)', 'S(680)', 'J(705)', 'U(760)', 'V(810)',
       'W(860)', 'K(900)', 'L(940)', 'T(730)', 'pH', 'EC  (dS/m)', 'OC (%)',
       'P   (kg/ha)', 'K (kg/ha)', 'Ca (meq/100g)', 'Mg (meq/100g)', 'S (ppm)',
       'Fe (ppm)', 'Mn (ppm)', 'Cu (ppm)', 'Zn (ppm)', 'B (ppm)'],
      dtype='object')




MLP Mean Squared Error: 6619.638477271707

MLP Prediction:
K (kg/ha): 40044.03
Ca (meq/100g): -1786.34
Mg (meq/100g): -2618.49
S (ppm): 27.92
Fe (ppm): -16060.46
Mn (ppm): -3317.71
Cu (ppm): -3019.62
Zn (ppm): -2503.70
B (ppm): -3394.31
