# Sales Analysis

- Cleaned, trained and tested a sales dataset and predicted the future sales.
- Later, analysed the same dataset with the help of SAS Visual Analytics.

## Method used : Holt's exponential smoothing

## Libraries used : 
- Pandas
- Numpy
- Matplotlib
- Statsmodels
- Sklearn

### Cleaning Dataset:

In [2]:
import pandas as pd

df = pd.read_excel('Book1.xlsx')

print(df.isna().sum())

df = df.dropna()

valid_genders = ['Male', 'Female']
valid_category = ['Books', 'Clothing', 'Cosmetics', 'Food & Beverage', 'Shoes', 'Souvenir', 'Technology', 'Toys']
valid_payment_method = ['Cash', 'Credit Card', 'Debit Card']
valid_shopping_mall = ['Cevahir AVM', 'Emaar Square Mall', 'Forum Istanbul', 'Istinye Park', 'Kanyon', 'Mall of Istanbul', 'Metrocity', 'Metropol AVM', 
                       'Viaport Outlet', 'Zorlu Center']
valid_data_types = {
    'age' : int, 'quantity' : int, 'price' : (int, float)
}


df = df[df['gender'].isin(valid_genders)]
df = df[df['category'].isin(valid_category)]
df = df[df['payment_method'].isin(valid_payment_method)]
df = df[df['shopping_mall'].isin(valid_shopping_mall)]

#df = df[df['age'].apply(lambda x:isinstance(x, valid_data_types['age'])) and df['quantity'].apply(lambda x:isinstance(x, valid_data_types['quantity']))
#        and df['price'].apply(lambda x:isinstance(x, valid_data_types['price']))]

df = df[df['age'].apply(lambda x:isinstance(x, valid_data_types['age']))]
#df = df[df['quantity'].apply(lambda x:isinstance(x, valid_data_types['quantity']))]
df = df[df['price'].apply(lambda x:isinstance(x, valid_data_types['price']))]

df['invoice_date'] = df['invoice_date'].dt.date

df.to_excel('Book2.xlsx', index = False)

print("Done.")


FileNotFoundError: [Errno 2] No such file or directory: 'Book1.xlsx'

### Training and Testing

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.holtwinters import ExponentialSmoothing
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error

data = pd.read_excel('Book2.xlsx')
data['invoice_date'] = pd.to_datetime(data['invoice_date'])
data.set_index('invoice_date', inplace=True)
# Assuming you want to resample to monthly data
monthly_data = data['quantity'].resample('W').sum()

# Split your data into training and testing
train_size = int(len(monthly_data) * 0.8)  # Adjust the split ratio as needed
train, test = monthly_data[:train_size], monthly_data[train_size:]

# Create and train the model
model = ExponentialSmoothing(train, seasonal='add', seasonal_periods=7)
model_fit = model.fit()

forecasts = model_fit.forecast(steps=len(test))

plt.figure(figsize=(12, 6))
plt.plot(train, label='Training Data')
plt.plot(test, label='Actual Data')
plt.plot(forecasts, label='Forecasts')
plt.legend()
plt.show()



# Extract actual testing data as a NumPy array
actual_data = test.values  # Replace 'test' with your testing data

# Generate forecasts from your model as a NumPy array
forecast_data = model_fit.forecast(steps=len(test))  # Assuming 'model_fit' is your trained model

# Assuming 'actual' and 'forecast' are your actual and forecasted values as NumPy arrays
mae = mean_absolute_error(actual_data, forecast_data)
mse = mean_squared_error(actual_data, forecast_data)
rmse = np.sqrt(mse)
mape = mean_absolute_percentage_error(actual_data, forecast_data) * 100  # Multiply by 100 to express as a percentage

print("Mean Absolute Error: ", mae)
print("Mean Squared Error: ", mse)
print("Root Mean Squared Error: ", rmse)
print("Mean Absolute Percentage Error: ", mape)

FileNotFoundError: [Errno 2] No such file or directory: 'Book2.xlsx'