In [1]:
# fubction cleaning and wranling Data

import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm

def preprocess_plant_production_data():
    """
    This function preprocesses data related to plant production.

    Returns:
    - Canna_Plant_prod: Preprocessed DataFrame with 'activitysummarydate' as the datetime index.
    """

    # Read the plant production data
    Canna_Plant_prod = pd.read_csv(r'C:\Users\Tracy\Downloads\Canna_project\meau-plav.csv')

    # Convert column names to lowercase
    cols = [col.lower() for col in Canna_Plant_prod.columns]
    Canna_Plant_prod.columns = cols

    # Drop the 'ccclastupdated' column
    Canna_Plant_prod = Canna_Plant_prod.drop(['ccclastupdated'], axis=1)

    # Convert 'activitysummarydate' column to datetime
    Canna_Plant_prod['activitysummarydate'] = pd.to_datetime(Canna_Plant_prod['activitysummarydate'])

    # Sort DataFrame by 'activitysummarydate'
    Canna_Plant_prod = Canna_Plant_prod.sort_values(by='activitysummarydate')

    # Reset the index after sorting
    Canna_Plant_prod = Canna_Plant_prod.reset_index(drop=True)

    # Set 'activitysummarydate' as the index
    Canna_Plant_prod.index = pd.DatetimeIndex(Canna_Plant_prod['activitysummarydate'])

    return Canna_Plant_prod

# Call the function to preprocess plant production data
preprocessed_plant_prod_data = preprocess_plant_production_data()



In [None]:
# Function forecasting and prediction

import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm

def perform_seasonal_decomposition_and_forecasting(Canna_Plant_prod):
    """
    This function performs seasonal decomposition and forecasting using the ARIMA model on plant production data.

    Parameters:
    - Canna_Plant_prod: DataFrame with 'activitysummarydate' as the datetime index and 'plantharvestedcount' as the target variable.

    Returns:
    - df_2: DataFrame with predictions and original data, saved to 'Prediction_Cultivation_Weed.xlsx'.
    """

    # Seasonal decomposition
    decomp = sm.tsa.seasonal_decompose(Canna_Plant_prod['plantharvestedcount'], period=365)
    decomp.plot()
    plt.show()

    # Convert 'activitysummarydate' column to datetime
    Canna_Plant_prod['activitysummarydate'] = pd.to_datetime(Canna_Plant_prod['activitysummarydate'])
    Canna_Plant_prod.index = pd.DatetimeIndex(Canna_Plant_prod['activitysummarydate'])

    # Perform seasonal decomposition
    decomp = sm.tsa.seasonal_decompose(Canna_Plant_prod['plantharvestedcount'], period=365)

    # Plot the decomposed components
    trend = decomp.trend
    seasonal = decomp.seasonal
    residual = decomp.resid

    plt.figure(figsize=(12, 8))

    plt.subplot(411)
    plt.plot(Canna_Plant_prod['plantharvestedcount'], label='Original')
    plt.legend(loc='upper left')

    plt.subplot(412)
    plt.plot(trend, label='Trend')
    plt.legend(loc='upper left')

    plt.subplot(413)
    plt.plot(seasonal, label='Seasonal')
    plt.legend(loc='upper left')

    plt.subplot(414)
    plt.plot(residual, label='Residual')
    plt.legend(loc='upper left')

    plt.tight_layout()
    plt.show()

    # Check for relation between consecutive datapoints
    plt.scatter(Canna_Plant_prod['plantharvestedcount'][:-1], Canna_Plant_prod['plantharvestedcount'][1:], s=0.1)
    plt.show()

    # Fit ARIMA model
    model = ARIMA(Canna_Plant_prod['plantharvestedcount'], order=(0, 1, 12))
    model_fit = model.fit()

    # Make predictions for the last 10 observations
    predictions = model_fit.predict(start=len(Canna_Plant_prod['plantharvestedcount']) - 10, end=len(Canna_Plant_prod['plantharvestedcount']) - 1)

    # Create a DataFrame for observed and predicted values
    predictions_df = pd.DataFrame({'observed': Canna_Plant_prod['plantharvestedcount'][-10:], 'predicted': predictions})

    # Visualize the observed vs predicted values
    plt.plot(predictions_df['observed'], label='Observed')
    plt.plot(predictions_df['predicted'], label='Predicted')
    plt.legend()
    plt.show()

    # Create future dates for forecasting
    future_dates_2025 = pd.date_range(start='2023-11-13', end='2024-12-31', freq='D')

    # Make predictions for future dates
    predictions = model_fit.predict(start=len(Canna_Plant_prod['plantharvestedcount']), end=len(Canna_Plant_prod['plantharvestedcount']) + len(future_dates_2025) - 1)

    # Create a DataFrame for predicted values with future dates
    predictions_df = pd.DataFrame({'recolted_date': future_dates_2025, 'predicted_value': predictions})
    predictions_df.set_index('recolted_date', inplace=True)

    # Concatenate original data and predictions
    df_2 = pd.concat([Canna_Plant_prod, predictions_df], axis=0)
    df_2.index = df_2['activitysummarydate']

    # Save the results to an Excel file
    df_2.to_excel('Prediction_Cultivation_Weed.xlsx', index=False)

    return df_2

# Call the function to perform seasonal decomposition and forecasting
result_df = perform_seasonal_decomposition_and_forecasting(Canna_Plant_prod)
