In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.arima.model import ARIMA
from datetime import datetime

In [None]:
data = pd.read_csv('2023-2024-Horana-cleaned.csv') 

data['DATE'] = pd.to_datetime(data['DATE'])

data['NetAmount'] = data['NetAmount'].abs()

daily_sales = data.groupby(['DATE', 'SalesPersonCode'])['NetAmount'].sum().reset_index()  
data['NetAmount'].head()

In [None]:
plt.figure(figsize=(12, 6))
sns.lineplot(data=daily_sales, x='DATE', y='NetAmount', hue='SalesPersonCode')
plt.title('Total Sales Trend per SalesPerson')
plt.xlabel('Date')
plt.ylabel('Total Sales')
plt.xticks(rotation=45)
plt.legend(title='SalesPersonCode')
plt.show() 

In [None]:
def predict_sales(sales_code):
    person_data = daily_sales[daily_sales['SalesPersonCode'] == sales_code]
    person_data = person_data.set_index('DATE').asfreq('D').fillna(0)  
    
    plt.figure(figsize=(10, 5))
    plt.plot(person_data.index, person_data['NetAmount'], label='Actual Sales')
    plt.title(f'Sales Trend for {sales_code}')
    plt.xlabel('Date')
    plt.ylabel('NetAmount')
    plt.legend()
    plt.show()
    
    model = ARIMA(person_data['NetAmount'], order=(5, 1, 0))
    model_fit = model.fit()
    
    forecast = model_fit.forecast(steps=7)
    forecast_dates = [person_data.index[-1] + pd.Timedelta(days=i) for i in range(1, 8)]
    
    plt.figure(figsize=(10, 5))
    plt.plot(person_data.index, person_data['NetAmount'], label='Actual Sales')
    plt.plot(forecast_dates, forecast, label='Forecasted Sales', linestyle='dashed', color='red')
    plt.title(f'7-Day Sales Forecast for {sales_code}')
    plt.xlabel('Date')
    plt.ylabel('NetAmount')
    plt.legend()
    plt.show()
    
    return pd.DataFrame({'DATE': forecast_dates, 'Predicted NetAmount': forecast})

In [None]:
sales_code = '265' 
predictions = predict_sales(sales_code)
print(predictions)

In [None]:
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error
!pip install prophet
!pip install plotly

In [None]:
from statsmodels.tsa.arima.model import ARIMA
from prophet import Prophet
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error
from statsmodels.tsa.holtwinters import ExponentialSmoothing


def evaluate_model(sales_code):
    person_data = daily_sales[daily_sales['SalesPersonCode'] == sales_code]
    person_data = person_data.set_index('DATE').asfreq('D').fillna(0)  # Ensure daily frequency

    train = person_data.iloc[:-7]
    test = person_data.iloc[-7:]

    try:
        model = ARIMA(train['NetAmount'], order=(5, 1, 0))
        model_fit = model.fit()
        forecast = model_fit.forecast(steps=7)
        mape = mean_absolute_percentage_error(test['NetAmount'], forecast) * 100
#         rmse = mean_squared_error(test['NetAmount'], forecast, squared=False)
        
        print(f"Model Evaluation (ARIMA) for {sales_code}:")
        print(f"MAPE: {mape:.2f}% ")
        
        if mape > 40:
            print(f"High MAPE ({mape:.2f}%). Trying with Prophet for {sales_code}.")
            return evaluate_ets(sales_code, test) 
        
        return forecast, test

    except Exception as e:
        print(f"ARIMA failed for {sales_code} due to {e}. Switching to Prophet...")
        return evaluate_prophet(sales_code, test) 


def evaluate_prophet(sales_code, test): 
    person_data = daily_sales[daily_sales['SalesPersonCode'] == sales_code]
    if len(person_data) < 10:
        print(f"Not enough data for {sales_code}, using mean-based prediction.")
        return [person_data['NetAmount'].mean()] * 7, test

    person_data = person_data.reset_index().rename(columns={'DATE': 'ds', 'NetAmount': 'y'})

    model = Prophet()
    model.fit(person_data)

    future = model.make_future_dataframe(periods=7)
    forecast = model.predict(future)
    
    pred_values = forecast.tail(7)['yhat'].values
    mape = mean_absolute_percentage_error(test['NetAmount'], pred_values) * 100
#     rmse = mean_squared_error(test['NetAmount'], pred_values, squared=False)

    print(f"Model Evaluation (Prophet) for {sales_code}:")
    print(f"MAPE: {mape:.2f}% ")
#     print(f"RMSE: {rmse:.2f} ")

    return pred_values, test 


def evaluate_ets(sales_code, test):
    person_data = daily_sales[daily_sales['SalesPersonCode'] == sales_code]
    if len(person_data) < 10:
        print(f"Not enough data for {sales_code}, using mean-based prediction.")
        return [person_data['NetAmount'].mean()] * 7, test

    train = person_data.iloc[:-7]
    
    model = ExponentialSmoothing(train['NetAmount'], trend="add", seasonal=None)
    model_fit = model.fit()
    forecast = model_fit.forecast(steps=7)

    mape = mean_absolute_percentage_error(test['NetAmount'], forecast) * 100
#     rmse = mean_squared_error(test['NetAmount'], forecast, squared=False)

    print(f"Model Evaluation (ETS) for {sales_code}:")
    print(f"MAPE: {mape:.2f}%")
#     print(f"RMSE: {rmse:.2f}")

    return forecast, test


def evaluate_moving_avg(sales_code, test, window=7):
    person_data = daily_sales[daily_sales['SalesPersonCode'] == sales_code]
    if len(person_data) < window:
        print(f"Not enough data for {sales_code}, using mean-based prediction.")
        return [person_data['NetAmount'].mean()] * 7, test

    train = person_data.iloc[:-7]
    forecast = train['NetAmount'].rolling(window=window).mean().iloc[-1]

    predictions = [forecast] * 7

    mape = mean_absolute_percentage_error(test['NetAmount'], predictions) * 100
#     rmse = mean_squared_error(test['NetAmount'], predictions, squared=False)

    print(f"Model Evaluation (Moving Avg) for {sales_code}:")
    print(f"MAPE: {mape:.2f}%")
#     print(f"RMSE: {rmse:.2f}")

    return predictions, test




In [None]:
forecast_909, test_909 = evaluate_model('265') 

In [None]:
def evaluate_model(sales_code):
    person_data = daily_sales[daily_sales['SalesPersonCode'] == sales_code]
    person_data = person_data.set_index('DATE').asfreq('D').fillna(0)  

    train = person_data.iloc[:-7]
    test = person_data.iloc[-7:]

    try:
        model = ARIMA(train['NetAmount'], order=(5, 1, 0))
        model_fit = model.fit()
        forecast = model_fit.forecast(steps=7)
        
        forecast_dates = [test.index[i] for i in range(7)]
        results_df = pd.DataFrame({
            'DATE': forecast_dates,
            'Predicted NetAmount': forecast.values,
            'Actual NetAmount': test['NetAmount'].values
        })

        # Calculate MAPE
        mape = mean_absolute_percentage_error(test['NetAmount'], forecast) * 100
        print(f"Model Evaluation (ARIMA) for {sales_code}: MAPE: {mape:.2f}%")

        if mape > 40:
            print(f"High MAPE ({mape:.2f}%). Trying ETS model for {sales_code}.")
            return evaluate_ets(sales_code, test) 
        
        return results_df

    except Exception as e:
        print(f"ARIMA failed for {sales_code} due to {e}. Switching to ETS...")
        return evaluate_ets(sales_code, test)

def evaluate_ets(sales_code, test):
    person_data = daily_sales[daily_sales['SalesPersonCode'] == sales_code]
    if len(person_data) < 10:
        print(f"Not enough data for {sales_code}, using mean-based prediction.")
        mean_forecast = [person_data['NetAmount'].mean()] * 7
        return pd.DataFrame({'DATE': test.index, 'Predicted NetAmount': mean_forecast, 'Actual NetAmount': test['NetAmount'].values})

    train = person_data.iloc[:-7]
    
    model = ExponentialSmoothing(train['NetAmount'], trend="add", seasonal=None)
    model_fit = model.fit()
    forecast = model_fit.forecast(steps=7)

    forecast_dates = [test.index[i] for i in range(7)]
    results_df = pd.DataFrame({
        'DATE': forecast_dates,
        'Predicted NetAmount': forecast.values,
        'Actual NetAmount': test['NetAmount'].values
    })

    mape = mean_absolute_percentage_error(test['NetAmount'], forecast) * 100
    print(f"Model Evaluation (ETS) for {sales_code}: MAPE: {mape:.2f}%")

    return results_df


In [None]:
forecast_df = evaluate_model('254')
print(forecast_df)

In [None]:
filtered_data = data[data['SalesPersonCode'] == '0']
print(len(filtered_data))


In [None]:
import matplotlib.pyplot as plt

person_data = daily_sales[daily_sales['SalesPersonCode'] == '0']
person_data = person_data.set_index('DATE').asfreq('D').fillna(0)  

plt.figure(figsize=(12, 6))
plt.plot(person_data.index, person_data['NetAmount'], marker='o', linestyle='-')
plt.xlabel("Date")
plt.ylabel("Net Sales Amount")
plt.title("Sales Trend Over Time for SalesPerson 909")
plt.grid(True)
plt.show() 