In [33]:
# Cleanaing the Data

import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm

def clean_and_wrangle_average_retail_price_data(file_path):
    """
    This function performs data cleaning and wrangling on the Average Retail Price data.

    Parameters:
    - file_path: Path to the CSV file containing Average Retail Price data.

    Returns:
    - Average_retail_price: Cleaned and wrangled DataFrame with 'solddate' as the datetime index.
    """

    # Read the Average Retail Price data
    Average_retail_price = pd.read_csv(file_path)

    # Convert column names to lowercase and replace spaces with underscores
    cols = [col.lower().replace(' ', '_') for col in Average_retail_price.columns]
    Average_retail_price.columns = cols

    # Drop the 'ccclastupdated' column
    Average_retail_price = Average_retail_price.drop(['ccclastupdated'], axis=1)

    # Convert 'solddate' column to datetime
    Average_retail_price['solddate'] = pd.to_datetime(Average_retail_price['solddate'])

    # Sort DataFrame by 'solddate'
    Average_retail_price = Average_retail_price.sort_values(by='solddate')

    # Reset the index after sorting
    Average_retail_price = Average_retail_price.reset_index(drop=True)

    # Set 'solddate' as the index
    Average_retail_price.index = pd.DatetimeIndex(Average_retail_price['solddate'])

    return Average_retail_price

# Call the function to clean and wrangle Average Retail Price data
cleaned_average_retail_price_data = clean_and_wrangle_average_retail_price_data(r'C:\Users\Tracy\Downloads\Canna_project\rqtv-uenj.csv')



In [None]:



import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.tsa.arima.model import ARIMA

def time_series_analysis_and_forecasting(data, column_name, order=(0,1,12), forecast_steps=10):
    """
    This function performs time series analysis and forecasting using ARIMA model.

    Parameters:
    - data: DataFrame with time series data.
    - column_name: Name of the target column in the DataFrame.
    - order: Order of the ARIMA model (p, d, q). Default is (0, 1, 12).
    - forecast_steps: Number of steps to forecast into the future. Default is 10.

    Returns:
    - None (Plots the original data, decomposition, lag plot, and forecast).
    """

    # Perform seasonal decomposition
    decomp = sm.tsa.seasonal_decompose(data[column_name], period=365)
    decomp.plot()
    plt.show()

    # Assuming 'solddate' is the datetime index
    data['solddate'] = pd.to_datetime(data['solddate'])
    data.index = pd.DatetimeIndex(data['solddate'])

    # Plot the decomposed components
    trend = decomp.trend
    seasonal = decomp.seasonal
    residual = decomp.resid

    plt.figure(figsize=(12, 8))

    plt.subplot(411)
    plt.plot(data[column_name], label='Original')
    plt.legend(loc='upper left')

    plt.subplot(412)
    plt.plot(trend, label='Trend')
    plt.legend(loc='upper left')

    plt.subplot(413)
    plt.plot(seasonal, label='Seasonal')
    plt.legend(loc='upper left')

    plt.subplot(414)
    plt.plot(residual, label='Residual')
    plt.legend(loc='upper left')

    plt.tight_layout()
    plt.show()

    # Check if there is a relation between consecutive datapoints
    plt.scatter(data[column_name][:-1], data[column_name][1:], s=0.1)
    plt.show()

    # Create ARIMA model
    model = ARIMA(data[column_name], order=order)
    model_fit = model.fit()

    # Make prediction
    predictions = model_fit.predict(start=len(data[column_name]) - forecast_steps, end=len(data[column_name]) - 1)

    # Plot observed vs predicted
    plt.plot(data[column_name][-forecast_steps:], label='Observed')
    plt.plot(predictions, label='Predicted', color='red')
    plt.legend()
    plt.show()

# Example usage with Average Retail Price data
time_series_analysis_and_forecasting(Average_retail_price, 'averageretailpricepergm', order=(0, 1, 12), forecast_steps=10)
