In [16]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np
from joblib import dump

# Load the dataset
file_name = 'expanded_black_pepper_dataset.csv'
df = pd.read_csv(file_name)

# Display the first few rows
print(df.head())

# Encoding Leaf Color if it is a categorical feature
label_encoder = LabelEncoder()
df['Leaf Color'] = label_encoder.fit_transform(df['Leaf Color'])  # Make sure column name matches in your dataset

# Define features (X) and targets (y)
X = df[['Plant Height (m)', 'Stem Width (cm)', 'Leaf Color', 'Leaf Length (cm)']]  # Update column names
y_age = df['Age (years)']  # Update if needed
y_yield = df['Yield (kg)']  # Update if needed

# Feature scaling (standardization)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)  # Scale the features

# Split data for Age prediction
X_train_age, X_test_age, y_train_age, y_test_age = train_test_split(X_scaled, y_age, test_size=0.2, random_state=42)

# Split data for Yield prediction
X_train_yield, X_test_yield, y_train_yield, y_test_yield = train_test_split(X_scaled, y_yield, test_size=0.2, random_state=42)

# Initialize and train SVR models
svr_age = SVR(kernel='rbf', C=1, gamma=0.1)
svr_age.fit(X_train_age, y_train_age)

svr_yield = SVR(kernel='rbf', C=1, gamma=0.1)
svr_yield.fit(X_train_yield, y_train_yield)

# Predict Age and Yield
y_pred_age = svr_age.predict(X_test_age)
y_pred_yield = svr_yield.predict(X_test_yield)

# Function to calculate accuracy
def calculate_accuracy(y_actual, y_predicted):
    mape = np.mean(np.abs((y_actual - y_predicted) / y_actual)) * 100
    accuracy = 100 - mape
    return accuracy

accuracy_age = calculate_accuracy(y_test_age, y_pred_age)
accuracy_yield = calculate_accuracy(y_test_yield, y_pred_yield)

print(f"Age Prediction Accuracy: {accuracy_age:.2f}%")
print(f"Yield Prediction Accuracy: {accuracy_yield:.2f}%")

# Save models and encoders for future use
dump(svr_age, "svr_age_model.pkl")
dump(svr_yield, "svr_yield_model.pkl")
dump(label_encoder, "label_encoder.pkl")
dump(scaler, "scaler.pkl")

print("Models and encoders saved successfully!")

# Function for predicting Age and Yield based on user input
def predict_age_and_yield():
    print("\nEnter the following values:")
    plant_height = float(input("Plant Height (e.g., 2.5): "))
    stem_width = float(input("Stem Width (e.g., 2.0): "))
    leaf_length = float(input("Leaf Length (e.g., 15.0): "))
    leaf_color = input("Leaf Color (e.g., Green): ")

    # Check if the entered Leaf Color is known
    if leaf_color not in label_encoder.classes_:
        print(f"Error: Leaf Color '{leaf_color}' is not recognized. Valid options are: {list(label_encoder.classes_)}")
        return

    # Encode Leaf Color
    leaf_color_encoded = label_encoder.transform([leaf_color])[0]

    # Prepare the input for prediction (make sure column order matches training data)
    user_input = pd.DataFrame([{
        'Plant Height (m)': plant_height,
        'Stem Width (cm)': stem_width,
        'Leaf Length (cm)': leaf_length,
        'Leaf Color': leaf_color_encoded
    }])

    # Reorder the columns of user_input to match the original column order used for training
    user_input = user_input[['Plant Height (m)', 'Stem Width (cm)', 'Leaf Color', 'Leaf Length (cm)']]

    # Scale the user input (use the same scaler as used for training)
    user_input_scaled = scaler.transform(user_input)

    # Predict Age
    predicted_age = svr_age.predict(user_input_scaled)
    print(f"Predicted Age: {predicted_age[0]:.2f} years")

    # Predict Yield
    predicted_yield = svr_yield.predict(user_input_scaled)
    print(f"Predicted Yield: {predicted_yield[0]:.2f} kg")

# Test the function for prediction
predict_age_and_yield()

   Plant Height (m)  Stem Width (cm)  Leaf Length (cm)   Leaf Color  \
0              0.90             0.52              7.35  Light Green   
1              3.00             2.80             12.02    Yellowish   
2              1.40             0.90             13.31  Light Green   
3              2.31             1.87             12.50   Dark Green   
4              0.68             0.58              5.83  Light Green   

   Age (years)  Yield (kg)  
0            1        1.00  
1           18        2.10  
2            4        2.71  
3            9        6.94  
4            1        1.00  
Age Prediction Accuracy: 95.58%
Yield Prediction Accuracy: 84.99%
Models and encoders saved successfully!

Enter the following values:
Predicted Age: 15.56 years
Predicted Yield: 4.53 kg


In [1]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_regression

# Generate sample data for training
X, y = make_regression(n_samples=100, n_features=1, noise=0.1, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a regression model
age_model = LinearRegression()
age_model.fit(X_train, y_train)
import joblib
import os

# Create the directory if it doesn't exist
os.makedirs('models', exist_ok=True)

# Save the trained model
joblib.dump(age_model, 'models/age_prediction_model.pkl')
print("Model saved successfully!")
loaded_model = joblib.load('models/age_prediction_model.pkl')
print("Model loaded successfully:", loaded_model)

Model saved successfully!
Model loaded successfully: LinearRegression()


In [13]:
from sklearn.preprocessing import OneHotEncoder
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import joblib

# Load the dataset
data = pd.read_csv(r'C:\Users\HP\OneDrive\Desktop\final_pepper\expanded_black_pepper_dataset.csv')

# One-Hot Encoding for 'Leaf Color'
encoder = OneHotEncoder(sparse_output=False, drop='first')  # Use 'sparse_output' instead of 'sparse'
leaf_color_encoded = encoder.fit_transform(data[['Leaf Color']])

# Convert the encoded data back into a DataFrame
leaf_color_encoded_df = pd.DataFrame(
    leaf_color_encoded,
    columns=encoder.get_feature_names_out(['Leaf Color'])
)

# Concatenate the encoded columns back to the original dataset
data = pd.concat([data, leaf_color_encoded_df], axis=1)

# Drop the original 'Leaf Color' column
data.drop('Leaf Color', axis=1, inplace=True)

# Features and target variable
X = data[['Plant Height (m)', 'Stem Width (cm)', 'Leaf Length (cm)'] + list(leaf_color_encoded_df.columns)]
y = data['Yield (kg)']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest Regressor
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Save the model
joblib.dump(model, r'C:\Users\HP\OneDrive\Desktop\final_pepper\models\yield_prediction_model.pkl')
print("Yield prediction model saved successfully!")

Yield prediction model saved successfully!


In [14]:
import joblib
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_regression

# Generate some sample data for yield prediction (you can use your actual dataset here)
X, y = make_regression(n_samples=100, n_features=7, noise=0.1)

# Train the yield model
yield_model = RandomForestRegressor()
yield_model.fit(X, y)

# Save the yield model to disk
joblib.dump(yield_model, 'models/yield_prediction_model.pkl')

['models/yield_prediction_model.pkl']

In [15]:
import joblib
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_regression

# Generate some sample data
X, y = make_regression(n_samples=100, n_features=7, noise=0.1)

# Train a model
model = RandomForestRegressor()
model.fit(X, y)

# Save the model to disk
joblib.dump(model, 'models/age_prediction_model.pkl')

['models/age_prediction_model.pkl']

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from joblib import dump
import os

# Load dataset
file_name = r'C:\Users\HP\OneDrive\Desktop\final_pepper\expanded_black_pepper_dataset.csv'
df = pd.read_csv(file_name)

# Encode 'Leaf Color' (categorical variable)
label_encoder = LabelEncoder()
df['Leaf Color'] = label_encoder.fit_transform(df['Leaf Color'])

# Features and targets
X = df[['Plant Height (m)', 'Stem Width (cm)', 'Leaf Length (cm)', 'Leaf Color']]
y_age = df['Age (years)']
y_yield = df['Yield (kg)']

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train_age, X_test_age, y_train_age, y_test_age = train_test_split(X_scaled, y_age, test_size=0.2, random_state=42)
X_train_yield, X_test_yield, y_train_yield, y_test_yield = train_test_split(X_scaled, y_yield, test_size=0.2, random_state=42)

# Train models
age_model = SVR(kernel='rbf', C=1, gamma=0.1)
age_model.fit(X_train_age, y_train_age)

yield_model = RandomForestRegressor(n_estimators=100, random_state=42)
yield_model.fit(X_train_yield, y_train_yield)

# Save models and encoders
os.makedirs(r'C:\Users\HP\OneDrive\Desktop\final_pepper\models', exist_ok=True)
dump(age_model, r'C:\Users\HP\OneDrive\Desktop\final_pepper\models\age_prediction_model.pkl')
dump(yield_model, r'C:\Users\HP\OneDrive\Desktop\final_pepper\models\yield_prediction_model.pkl')
dump(label_encoder, r'C:\Users\HP\OneDrive\Desktop\final_pepper\models\label_encoder.pkl')
dump(scaler, r'C:\Users\HP\OneDrive\Desktop\final_pepper\models\scaler.pkl')

print("Models and related objects saved successfully!")

Models and related objects saved successfully!
