In [None]:
!pip install fbprophet

In [None]:
!pip uninstall pystan

In [None]:
import boto3
import base64
from botocore.exceptions import ClientError
import json
import os
import pandas as pd
import sys
from io import StringIO 
import matplotlib.pyplot as plt
%matplotlib inline
from fbprophet import Prophet

In [None]:
conda env list

In [None]:
def get_secret():

    secret_name = 'arn:aws:secretsmanager:us-east-2:363247502029:secret:AWSAccessKeys-6eo7IN'
    region_name = "us-east-2"
    secret = ''

    # Create a Secrets Manager client
    session = boto3.session.Session()
    client = session.client(
        service_name='secretsmanager',
        region_name=region_name
    )

    # In this sample we only handle the specific exceptions for the 'GetSecretValue' API.
    # See https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_GetSecretValue.html
    # We rethrow the exception by default.

    get_secret_value_response = client.get_secret_value(
        SecretId=secret_name
    )
    # Decrypts secret using the associated KMS CMK.
    # Depending on whether the secret is a string or binary, one of these fields will be populated.
    if 'SecretString' in get_secret_value_response:
        secret = get_secret_value_response['SecretString']
    else:
        secret = base64.b64decode(get_secret_value_response['SecretBinary'])
    return json.loads(secret)
    # Your code goes here. 

In [None]:
# get your credentials from environment variables
secrets = get_secret()
aws_id = secrets['AWS_ACCESS_KEY']
aws_secret = secrets['AWS_SECRET_KEY']

client = boto3.client('s3', aws_access_key_id=aws_id,aws_secret_access_key=aws_secret)

bucket_name = 'airline-test'

object_key = 'dataset/FlyDubai.csv'
csv_obj = client.get_object(Bucket=bucket_name, Key=object_key)
body = csv_obj['Body']
csv_string = body.read().decode('utf-8')

# vAR_df1 = pd.read_csv(StringIO(csv_string))

In [None]:
class Model():
    def __init__(self):
        self.vAR_df1 = pd.read_csv(StringIO(csv_string))

    def data_preparation(self):
        #Concatenating columns to one column
        self.vAR_df1['Key'] = self.vAR_df1['Airline'] + "<<>>" + self.vAR_df1['Source']+ "<<>>"+ self.vAR_df1['Destination']+ "<<>>"+ self.vAR_df1['Product']+ "<<>>"+ self.vAR_df1['Customer']

        #Creating booking dataset
        self.vAR_df2_book = self.vAR_df1[["Key", "Date", "Booking"]]

        #Creating revenue dataset
        self.vAR_df2_rev = self.vAR_df1[["Key", "Date", "Revenue"]]

        #Renaming the columns for prophet library
        self.vAR_booking = self.vAR_df2_book.rename(columns={'Date': 'ds', 'Booking':'y'})

    def data_preprocessing(self):
        #Creating empty dataframe to append with the output
        forecast_outcome= pd.DataFrame()
        #Grouping the dataframe with KEY column
        grouped = self.vAR_booking.groupby('Key')
        #Applying prophet model to each group
        for var in grouped.groups:
            group = grouped.get_group(var)
            model = Prophet()
            model.fit(group)
            future = model.make_future_dataframe(periods=365)
            forecast = model.predict(future)
            forecast = forecast.rename(columns={'yhat': 'yhat_'+var})
            forecast_outcome= pd.merge(forecast_outcome, forecast.set_index('ds'), how='outer', left_index=True, right_index=True)
        forecast_outcome= forecast_outcome[['yhat_' + var for var in grouped.groups.keys()]]
        self.vAR_Outcome = pd.DataFrame(forecast_outcome).reset_index()

    def Model_training(self):
        #Converting column to rows
        self.Model_outcome = self.vAR_Outcome.melt(id_vars="ds",var_name="cols",value_name="Value")

        #Replacing unwanted value
        self.Model_outcome['cols'] = self.Model_outcome['cols'].str.replace('yhat_', '')

        #Splitting the Key column to required 5 columns
        self.Model_outcome[['Airline', 'Source', 'Destination', 'Product', 'Customer']] = self.Model_outcome['cols'].str.split('<<>>', 5, expand=True)

        #Renaming the columns
        self.Model_outcome.columns = ['Date_of_Journey', "Key", "Booking", 'Airline', 'Source', 'Destination', 'Product', 'Customer']

        #Extracting only required columns
        self.final_model_outcome = self.Model_outcome[['Date_of_Journey', 'Airline', 'Source', 'Destination', 'Product', 'Customer', 'Booking']]

        #Exporting the file
        self.final_model_outcome.to_csv("Output/final_forecast_Booking.csv", index=False)

        #Renaming the columns for prophet library
        self.vAR_revenue = self.vAR_df2_rev.rename(columns={'Date': 'ds', 'Revenue':'y'})

        
        #Creating empty dataframe to append with the output
        forecast_outcome= pd.DataFrame()

        #Grouping the dataframe with KEY column
        grouped = self.vAR_revenue.groupby('Key')

        #Applying prophet model to each group
        for var in grouped.groups:
            group = grouped.get_group(var)
            model = Prophet()
            model.fit(group)
            future = model.make_future_dataframe(periods=365)
            forecast = model.predict(future)
            forecast = forecast.rename(columns={'yhat': 'yhat_'+var})
            forecast_outcome= pd.merge(forecast_outcome, forecast.set_index('ds'), how='outer', left_index=True, right_index=True)
        forecast_outcome= forecast_outcome[['yhat_' + var for var in grouped.groups.keys()]]
        vAR_Outcome = pd.DataFrame(forecast_outcome).reset_index()

        #Converting column to rows
        self.Model_outcome = vAR_Outcome.melt(id_vars="ds",var_name="cols", value_name="Value")

        #Replacing unwanted value
        self.Model_outcome['cols'] = self.Model_outcome['cols'].str.replace('yhat_', '')

        #Splitting the Key column to required 5 columns
        self.Model_outcome[['Airline', 'Source', 'Destination', 'Product', 'Customer']] = self.Model_outcome['cols'].str.split('<<>>', 5, expand=True)

        #Renaming the columns
        self.Model_outcome.columns = ['Date_of_Journey', "Key", "Revenue", 'Airline', 'Source', 'Destination', 'Product', 'Customer']

        #Extracting only required columns
        self.final_model_outcome = self.Model_outcome[['Date_of_Journey', 'Airline', 'Source', 'Destination', 'Product', 'Customer', 'Revenue']]

        #Exporting the file
        self.final_model_outcome.to_csv("Output/final_forecast_revenue.csv", index=False)


In [None]:
if __name__ == '__main__':
    model_instance = Model()
    model_instance.data_preparation()
    model_instance.data_preprocessing()
    model_instance.Model_training()  