In [None]:
#importing kaggle
!pip install kaggle

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
! ls /content/drive


In [None]:
!find /content/drive -name kaggle.json

In [None]:
!ls /content/drive/MyDrive

In [None]:
!ls /content

In [None]:
#locating the dataset
!mkdir -p ~/.kaggle
!cp "/content/drive/MyDrive/MyDrive kaggle/kaggle.json" ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
#importing the dataset
!kaggle datasets download -d pratyushakar/rossmann-store-sales

In [None]:
#extracting the zip file
from zipfile import ZipFile
dataset="/content/rossmann-store-sales.zip"
with ZipFile(dataset,"r") as zip:
  zip.extractall()
  print("successfull")

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
sales_dataset=pd.read_csv("/content/train.csv")

In [None]:
sales_dataset.head()

In [None]:
sales_dataset.shape

In [None]:
sales_dataset.isnull().sum()

In [None]:
store_dataset=pd.read_csv("/content/store.csv")

In [None]:
store_dataset.head()

In [None]:
store_dataset.shape

In [None]:
store_dataset.isnull().sum()

In [None]:
sns.displot(store_dataset["CompetitionDistance"])

In [None]:
store_dataset.fillna({"CompetitonDistance":store_dataset["CompetitionDistance"].mode()},inplace=True)

In [None]:
sns.displot(store_dataset["CompetitionOpenSinceMonth"])

In [None]:
store_dataset.fillna({"CompetitionOpenSinceMonth":store_dataset['CompetitionOpenSinceMonth'].mean},inplace=True)

In [None]:
sns.displot(store_dataset['CompetitionOpenSinceYear'])

In [None]:
store_dataset.fillna({"CompetitionOpenSinceYear":store_dataset["CompetitionOpenSinceYear"].median()},inplace=True)

In [None]:
sns.displot(store_dataset['Promo2SinceWeek'])

In [None]:
store_dataset.fillna({"Promo2SinceWeek":store_dataset["Promo2SinceWeek"].mean()},inplace=True)

In [None]:
sns.displot(store_dataset['Promo2SinceYear'])

In [None]:
store_dataset.fillna({"Promo2SinceYear":store_dataset["Promo2SinceYear"].mean()},inplace=True)

In [None]:
sns.displot(store_dataset['PromoInterval'])

In [None]:
store_dataset.fillna({"PromoInterval":"unknown"},inplace=True)

In [None]:
test_dataset=pd.read_csv("/content/test.csv")

In [None]:
test_dataset.head()

In [None]:
test_dataset.isnull().sum()

In [None]:
sns.displot(test_dataset["Open"])

In [None]:
test_dataset.head()

In [None]:
test_dataset.fillna({"Open":test_dataset["Open"].mode()[0]},inplace=True)

In [None]:
test_dataset.isnull().sum()

In [None]:
sales_dataset["Date"]=pd.to_datetime(sales_dataset["Date"])
sales_dataset=sales_dataset.sort_values("Date")

In [None]:
sales_dataset["month"]=sales_dataset["Date"].dt.month

In [None]:
sales_dataset["Year"]=sales_dataset["Date"].dt.year

In [None]:
sales_dataset=sales_dataset.set_index("Date")

In [None]:
sales_monthly=sales_dataset["Sales"].resample("ME").sum()

In [None]:
plt.figure(figsize=(10,10))
plt.plot(sales_monthly.index,sales_monthly.values)
plt.title("Monthly Sales")
plt.xlabel("Date")
plt.ylabel("Sum of Sales")
plt.plot()

In [None]:
sales_week=sales_dataset["Sales"].resample("W").sum()

In [None]:
plt.figure(figsize=(10,10))
plt.plot(sales_week.index,sales_week.values)
plt.title("WEEKLY SALES")
plt.xlabel("Date")
plt.ylabel("Sum of Sales")
plt.plot()

In [None]:
sales_dataset["Isholiday"]=sales_dataset["SchoolHoliday"].apply(lambda x:"yes" if x==1 else "working")

In [None]:
sales_dataset["sales_lag"]=sales_dataset["Sales"].shift(1)
sales_dataset["rolling_30"]=sales_dataset["Sales"].rolling(30).mean()

In [None]:
train_size=int(len(sales_dataset)*0.8)
train=sales_dataset.iloc[:train_size]
test=sales_dataset[train_size:]

In [None]:
from os import name
state_holidays=sales_dataset[sales_dataset["StateHoliday"]!=0].index.to_frame(name="ds")
state_holidays["holiday"]="stateholiday"

In [None]:
state_holidays.reset_index(drop=True)

In [None]:
from os import name
school_holidays=sales_dataset[sales_dataset['SchoolHoliday']==1].index.to_frame(name="ds")
school_holidays["holiday"]="schoolholidays"

In [None]:
school_holidays.reset_index(drop=True)

In [None]:
holiday_df=pd.concat([state_holidays,school_holidays])
holiday_df["lower_window"]=0
holiday_df["upper_window"]=1
holiday_df=holiday_df.drop_duplicates()

In [None]:
from prophet import Prophet
model_1=Prophet(holidays=holiday_df)
model_1.add_regressor("Customers")
model_1.add_regressor("Promo")

In [None]:
#dataframe
df_prophet=sales_dataset.copy()
df_prophet["ds"]=df_prophet.index
df_prophet=df_prophet[["ds","Sales","Customers","Promo"]]
df_prophet=df_prophet.rename(columns={"Sales":"y"})

In [None]:
print(df_prophet.columns)

In [None]:
#train the model
model_1.fit(df_prophet)

In [None]:
#forecasting
future=model_1.make_future_dataframe(periods=30)
future["Customers"]=df_prophet["Customers"].mean()
future["Promo"]=0
forecast=model_1.predict(future)

In [None]:
print(forecast)

In [None]:
test.head()

In [None]:
test.head()

In [None]:
ytrue=test["Sales"].values
ypred=forecast.set_index("ds").loc[test.index,"yhat"].values
from sklearn.metrics import mean_absolute_error
#for actual,predicted in zip(ytrue,ypred):
mae=mean_absolute_error(ytrue,ypred)
print("MAE:",mae)


In [None]:
sales_dataset.tail()

In [None]:
req_df=pd.DataFrame({"Date":test.index,"Actual_sales":ytrue,"forecasted_sales":ypred,"Store":test["Store"],"Week":test["DayOfWeek"],
                     "Month":test["month"],"Year":test["Year"],"stateholiday":test["StateHoliday"],"schoolholiday":test["SchoolHoliday"],
                     "holiday":test["Isholiday"],"forecasted_error":ytrue-ypred})


In [None]:
req_df.to_csv("forecasted_sales.csv",index=False)

In [None]:
from google.colab import files
files.download("forecasted_sales.csv")