In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import pickle

In [2]:
data = pd.read_csv("Cleaned_data.csv")

In [3]:
X = data.drop(columns=['Date','Variety','Yield'])
Y = data['Yield']

In [4]:
X.head()

Unnamed: 0,Noofpots,TempInside,HumidInside,CO2Inside,TempOutside,HumidOutside,CO2Outside
0,6364,24.1,99,661,24.6,91,553
1,6364,25.4,84,608,26.2,83,584
2,6364,24.6,89,675,24.6,90,562
3,6364,24.6,87,705,24.5,90,600
4,6364,24.4,87,726,24.3,87,541


In [5]:
Y.head()

0    23160.0
1    10625.0
2    13625.0
3    13265.0
4    13625.0
Name: Yield, dtype: float64

In [6]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)


In [7]:
rf_model = RandomForestRegressor(n_estimators=75, random_state=42)
rf_model.fit(X_train, Y_train)

In [8]:
Y_pred = rf_model.predict(X_test)
mae = mean_absolute_error(Y_test, Y_pred)
mse = mean_squared_error(Y_test, Y_pred)
r2 = r2_score(Y_test, Y_pred)

In [9]:
print("Mean Absolute Error:", mae)
print("Mean Squared Error:", mse)
print("R-squared:", r2)

Mean Absolute Error: 4625.552919520222
Mean Squared Error: 114663769.78709783
R-squared: 0.5208454213375163


In [10]:
with open('trained_model_random_forest.pkl', 'wb') as file:
    pickle.dump(rf_model, file)

In [11]:
with open('trained_model_linear.pkl', 'rb') as file:
    loaded_model = pickle.load(file)

In [12]:
example_input = X_test.iloc[0] 
predicted_yield = loaded_model.predict([example_input])
print("Predicted Yield:", predicted_yield)

Predicted Yield: [9153.25484547]


