Name: Pratik Sushil Tirpude

Project Title: AI for Comparative price analysis in Interior design

Track: Time series model

In [None]:
# Dataset

import pandas as pd

# Load dataset
df = pd.read_csv("interior_design_prices.csv", parse_dates=["Date"])
print(df.head())
print(df.info())
print(df['Category'].unique())


Here, we have ensured the data is loaded correctly.

In [None]:
#Exploratory data analysis

import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(12,6))
sns.lineplot(data=df, x="Date", y="Price", hue="Category", marker="o")
plt.title("Comparative Price Trends in Interior Design")
plt.ylabel("Price (INR)")
plt.xlabel("Date")
plt.grid(True)
plt.show()


In the above line chart, we can see the plotted charts for Furniture, Lighting, Florring and Decor.
From the chart we can clearly see the consistent price rise in the Furniture and Decor category.

In [None]:
#Growth and volatility analysis

df_sorted = df.sort_values(["Category","Date"]).copy()
df_sorted["Monthly_Return"] = df_sorted.groupby("Category")["Price"].pct_change()

summary = df_sorted.groupby("Category").agg(
    Start_Price=("Price", lambda s: s.iloc[0]),
    End_Price=("Price", lambda s: s.iloc[-1]),
    Months=("Price", "count"),
    Avg_Monthly_Return=("Monthly_Return", "mean"),
    Volatility=("Monthly_Return", "std")
).reset_index()

# CAGR calculation
def calc_cagr(start, end, months):
    years = months / 12
    return (end/start)**(1/years) - 1

summary["CAGR"] = summary.apply(lambda r: calc_cagr(r["Start_Price"], r["End_Price"], r["Months"]), axis=1)
print(summary)


Here, from CAGR we can clearly see that the Decor has the fastest growth rate, while from Volatility we can conclude that Furniture has the most stable growth.

In [None]:
# Forecasting

!pip install prophet


In [None]:
#Forecasting each category

categories = df['Category'].unique()
all_forecasts = {}
for cat in categories:
    print(f'\n==== Forecasting: {cat} ====')
    sub = df[df['Category'] == cat].sort_values('Date')
    ts = sub[['Date','Price']].rename(columns={'Date':'ds','Price':'y'})
    m = Prophet()
    m.fit(ts)
    future = m.make_future_dataframe(periods=12, freq='MS')
    fcst = m.predict(future)
    all_forecasts[cat] = fcst
    fig1 = m.plot(fcst)
    plt.title(f'Price Forecast: {cat}')
    plt.xlabel('Date')
    plt.ylabel('Price (INR)')
    plt.show()
    fig2 = m.plot_components(fcst)
    plt.show()
    out_path = f'forecast_{cat.lower()}.csv'
    fcst[['ds','yhat','yhat_lower','yhat_upper']].to_csv(out_path, index=False)
    print('Saved:', out_path)

future_summary = []
for cat, fcst in all_forecasts.items():
    tail = fcst.tail(12)
    future_summary.append({'Category': cat, 'Avg_Predicted_Price_Next12M': tail['yhat'].mean()})
future_summary_df = pd.DataFrame(future_summary)
display(future_summary_df)
future_summary_df.to_csv('future_summary_next12m.csv', index=False)
print('Saved: future_summary_next12m.csv')

In [None]:
#Evaluation metrics

from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

evaluation_results = []

for cat in df['Category'].unique():
    # Load forecast CSV
    fcst = pd.read_csv(f"forecast_{cat.lower()}.csv", parse_dates=["ds"])

    # Get actual data
    actual = df[df["Category"] == cat][["Date", "Price"]].rename(columns={"Date":"ds","Price":"y"})

    # Define train-test split (train until 2023, test = 2024)
    test = actual[actual["ds"] > "2023-12-01"]

    # Match predictions with test set
    pred = fcst[fcst["ds"].isin(test["ds"])]

    if pred.empty:
        print(f"No forecast data matched for {cat}, skipping.")
        continue

    # Align by merging on 'ds' (to avoid index mismatch)
    merged = test.merge(pred[["ds","yhat"]], on="ds", how="inner")

    if merged.empty:
        print(f"Could not align actual vs forecast for {cat}, skipping.")
        continue

    # Compute metrics
    mae = mean_absolute_error(merged['y'], merged['yhat'])
    mse = mean_squared_error(merged['y'], merged['yhat'])
    rmse = np.sqrt(mse)   # manual RMSE

    print(f"{cat} → MAE={mae:.2f}, RMSE={rmse:.2f}")

    evaluation_results.append({
        "Category": cat,
        "MAE": mae,
        "RMSE": rmse
    })

# Save evaluation results to CSV
if evaluation_results:
    eval_df = pd.DataFrame(evaluation_results)
    display(eval_df)
    eval_df.to_csv("evaluation_summary.csv", index=False)
    print("Saved: evaluation_summary.csv")
else:
    print("No evaluation results generated.")

