<a href="https://colab.research.google.com/github/MuratCKoc/salesinsider/blob/main/ProphetsPrediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# First setup the dependencies

#Our most important library is FB Prophet which will be using rule based learning
from fbprophet import Prophet
import pandas as pd
import numpy as np
import matplotlib

KeyboardInterrupt: 

In [None]:
# We then read the CSV file from GitHub
df = pd.read_csv('https://raw.githubusercontent.com/MuratCKoc/salesinsider/main/salesinsider/data/Groceries_dataset.csv')
# As the file has multiple columns, we select the columns that we will be using for the project.
data_df = df[['Date', 'itemDescription']]
data_df.head()

In [None]:
# We then use the get dummies function to convert each of the food items into an indicator.
# What this means is that all the data is converted into a single number (1)
cat_df = pd.get_dummies(data_df["itemDescription"])
# We then concatenate the data
final_df = pd.concat([data_df,cat_df],axis=1)
# After concatenation of the data we then group all the data by the dates which gives a table we can use to conduct our machine learning algorithm.
grouped_df = final_df.groupby("Date", as_index=False).sum()
grouped_df.head()

In [None]:
# Format the data, remove ".()/ " characters and restructure the names
grouped_df.columns = grouped_df.columns.str.replace("[(.)]", "")
grouped_df.columns = grouped_df.columns.str.replace("[/]", " or ")
grouped_df.columns = grouped_df.columns.str.replace("[ ]", "_")

In [None]:
# Create master dataframe
master_df = pd.DataFrame()

for j in range(727, len(grouped_df.index)):
   # Predict thru all transactions.
  for i in range (1, len(grouped_df.columns)): 
    prophet_df = grouped_df[["Date", grouped_df.columns[i]]]
    prophet_df.columns = ["ds", 'y']
    prophet_df['ds'] = pd.to_datetime(prophet_df.ds,format='%d-%m-%Y') 
    model = Prophet()
    model.fit(prophet_df)
    future = model.make_future_dataframe(periods=213)
    forecast = model.predict(future)
    predicted_df = forecast[["ds", "yhat"]]
    predicted_df.yhat = predicted_df.yhat.round()

    predicted_ColName = grouped_df.columns[i]+"_Predicted"

    fig = model.plot(forecast, xlabel='Date', ylabel='Frequency')
    ax = fig.gca()
    ax.set_title(predicted_ColName, size=24)
    ax.set_xlabel("Date", size=18)
    ax.set_ylabel("Frequency", size=18)
    ax.tick_params(axis="x", labelsize=15)
    ax.tick_params(axis="y", labelsize=15)
    fig.savefig("static/images/plots/"+predicted_ColName+".png")

    predicted_df = predicted_df.rename( columns={"ds": "Date", "yhat": predicted_ColName})

    #initialize container df
    if len(master_df.index) == 0:
      master_df = predicted_df.copy()
    else:
      master_df[predicted_ColName] = predicted_df[predicted_ColName]
    colName = grouped_df.columns[i]
    master_df[colName] = grouped_df[colName]

    if i == len(grouped_df.columns):
      break

In [None]:
# This table shows us a consolidated view of the table with the output of machine learning applied here.
# Notice that we have double the number of columns.
master_df.head()

In [None]:
# We then have to start cleaning up the table and remove all "NAN"s from the predicted columns. This will make it easier for visualization purposes.
clean_df = master_df
clean_df = clean_df.replace(np.nan,"")
clean_df

In [None]:
# When tried to move the tables into SQL we received errors, the cause of this was the certain column names have parenthesis.
# The parentheses are escape strings which cause an error, so we have to replace them.
clean_df.rename(columns={'flower (seeds)' : 'flower seeds'}, inplace=True)
clean_df.rename(columns={'liquor (appetizer)':'liquor appetizer'}, inplace=True)
clean_df.rename(columns={'flower (seeds)_Predicted' : 'flower seeds_Predicted'}, inplace=True)
clean_df.rename(columns={'liquor (appetizer)_Predicted':'liquor appetizer_Predicted'}, inplace=True)

In [None]:
# This is to check if the strings have been changed.
list(clean_df)

In [None]:
# We then proceed to move the table to SQL
# Import SQLAlchemy Dependencies 
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine
from config import database_url

In [None]:
# To update database with dataframe
def Update_Database(df_name, table_name):
    engine = create_engine(database_url, echo=False)
    session = Session(engine)
    Base = automap_base()
    Base.prepare(engine, reflect=True)    
    cxn = engine.connect()
    df_name.to_sql(name=table_name, con=engine, if_exists='append', index=True)
    print(table_name + ' added')
    #Add primary key
    #with engine.connect() as con:
    #con.execute('ALTER TABLE `predicted_table` ADD PRIMARY KEY (`Date`);')

In [None]:
Update_Database(clean_df, "predicted_table")