In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load dataset
df = pd.read_csv("avocado.csv")

# Preprocessing
df.rename(columns={"Unnamed: 0": "Id"}, inplace=True)
df["Date"] = pd.to_datetime(df["Date"])
df["month"] = df["Date"].dt.month
df["week"] = df["Date"].dt.isocalendar().week
df["year"] = df["Date"].dt.year
df["type"] = df["type"].map({"conventional": 0, "organic": 1})
df = pd.get_dummies(df, columns=["region"], drop_first=True)

# Feature selection
X = df.drop(columns=["Date", "AveragePrice", "Id"])
y = df["AveragePrice"]

# Splitting data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model training
models = {
    "Linear Regression": LinearRegression(),
    "Random Forest": RandomForestRegressor(n_estimators=100, random_state=42),
    "Gradient Boosting": GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
}

results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    results[name] = {
        "R2 Score": r2_score(y_test, y_pred),
        "MAE": mean_absolute_error(y_test, y_pred),
        "RMSE": np.sqrt(mean_squared_error(y_test, y_pred))
    }

# Display results
for model, metrics in results.items():
    print(f"{model} Performance:")
    for metric, value in metrics.items():
        print(f"  {metric}: {value:.4f}")
    print()

# Forecasting future avocado prices
future_dates = pd.date_range(start="2025-03-01", periods=6, freq="M")
future_df = pd.DataFrame({"Date": future_dates})
future_df["month"] = future_df["Date"].dt.month
future_df["week"] = future_df["Date"].dt.isocalendar().week
future_df["year"] = future_df["Date"].dt.year
for col in X_train.columns:
    if col not in future_df.columns:
        future_df[col] = X_train[col].mean()

best_model = models["Gradient Boosting"]
future_df["PredictedPrice"] = best_model.predict(future_df[X_train.columns])
print("Future Avocado Price Predictions:")
print(future_df[["Date", "PredictedPrice"]])
