<a href="https://colab.research.google.com/github/EbtesamAlahmari/Erwaa/blob/main/code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Load the Excel file to inspect the data
file_path = 'tomato_crop_data.xlsx'
data = pd.read_excel(file_path)

# Prepare the data for the regression model
# Drop the target variable and any unnecessary columns
X = data.drop(columns=['Daily Water Requirement (liters/plant)', 'City'])
y = data['Daily Water Requirement (liters/plant)']

# Convert categorical variables to numerical format using one-hot encoding
X_encoded = pd.get_dummies(X, drop_first=True)

# Split the encoded data into training and testing sets
X_train_encoded, X_test_encoded, y_train_encoded, y_test_encoded = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

# Initialize the Linear Regression model
model = LinearRegression()

# Train the model on the encoded training data
model.fit(X_train_encoded, y_train_encoded)

# Predict on the test data
y_pred_encoded = model.predict(X_test_encoded)

# Evaluate the model using Mean Squared Error
mse_encoded = mean_squared_error(y_test_encoded, y_pred_encoded)

# Print the evaluation results
print(f"Mean Squared Error: {mse_encoded}")
print(f"Model Coefficients: {model.coef_}")
print(f"Model Intercept: {model.intercept_}")

# Optionally, to see a few predictions
for i in range(5):  # Show first 5 predictions
    print(f"Predicted: {y_pred_encoded[i]:.2f}, Actual: {y_test_encoded.iloc[i]:.2f}")


Mean Squared Error: 1.5587729287460215
Model Coefficients: [ 0.63732176 -0.0099912  -0.00553505 ... -0.36523352 -1.2574001
  2.87696715]
Model Intercept: 4.614151364811445
Predicted: 1.62, Actual: 3.00
Predicted: 3.28, Actual: 3.20
Predicted: 2.58, Actual: 3.40
Predicted: 2.86, Actual: 2.20
Predicted: 3.38, Actual: 2.20


# New Section

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder

crop_data = pd.read_excel('tomato_crop_data.xlsx')
weather_data = pd.read_excel('weather_data.xlsx')

data = pd.merge(crop_data, weather_data, on='City', how='left')

X = data.drop(columns=['Daily Water Requirement (liters/plant)', 'City'])
y = data['Daily Water Requirement (liters/plant)']

numerical_features = X.select_dtypes(include=['int64', 'float64']).columns
categorical_features = X.select_dtypes(include=['object']).columns

numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ])

model_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('model', RandomForestRegressor(n_estimators=100, random_state=42))
])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model_pipeline.fit(X_train, y_train)

y_pred = model_pipeline.predict(X_test)

mse = mean_squared_error(y_test, y_pred)

print(f"Mean Squared Error: {mse}")

for i in range(5):
    print(f"Predicted: {y_pred[i]:.2f}, Actual: {y_test.iloc[i]:.2f}")


Mean Squared Error: 0.001063552827172827
Predicted: 1.40, Actual: 1.40
Predicted: 2.80, Actual: 2.80
Predicted: 3.40, Actual: 3.40
Predicted: 1.70, Actual: 1.70
Predicted: 1.60, Actual: 1.60
