In [52]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.svm import SVR
import numpy as np

In [53]:

import pandas as pd
import chardet  # Use chardet to detect the encoding

# Path to the original CSV file
original_csv_path = r'C:\Users\amaan\Downloads\sales_data.csv'

# Detect the encoding of the original CSV file
with open(original_csv_path, 'rb') as f:
    result = chardet.detect(f.read())
    encoding = result['encoding']

# Convert the file to UTF-8 encoding
with open(original_csv_path, 'rb') as f:
    with open('sales_data_utf8.csv', 'w', encoding='utf-8') as f_utf8:
        f_utf8.write(f.read().decode(encoding))

# Specify the columns you want to keep
columns_to_keep = ['ORDERNUMBER', 'PRICEEACH', 'MONTH_ID', 'YEAR_ID', 'SALES']

# Read the UTF-8 encoded CSV file into a pandas DataFrame
df = pd.read_csv('sales_data_utf8.csv', usecols=columns_to_keep)

# Print the resulting DataFrame
print(df)

      ORDERNUMBER  PRICEEACH    SALES  MONTH_ID  YEAR_ID
0           10107      95.70  2871.00         2     2003
1           10121      81.35  2765.90         5     2003
2           10134      94.74  3884.34         7     2003
3           10145      83.26  3746.70         8     2003
4           10159     100.00  5205.27        10     2003
...           ...        ...      ...       ...      ...
2818        10350     100.00  2244.40        12     2004
2819        10373     100.00  3978.51         1     2005
2820        10386     100.00  5417.57         3     2005
2821        10397      62.24  2116.16         3     2005
2822        10414      65.52  3079.44         5     2005

[2823 rows x 5 columns]


In [54]:
# Block 3: Preprocess the data
# Define categorical and numerical columns
categorical_cols = ['MONTH_ID', 'YEAR_ID']
numerical_cols = ['PRICEEACH', 'SALES']

# Create preprocessing pipelines for both numerical and categorical data
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ])


In [55]:
# Block 4: Create a pipeline that first preprocesses the data then applies an SVM model
pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('svm', SVR(kernel='linear'))])


In [56]:
# Block 5: Select features and target variable
X = df[categorical_cols + numerical_cols]
y = df['SALES']


In [57]:
# Block 6: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [58]:
# Block 7: Train the SVM model using the pipeline
pipeline.fit(X_train, y_train)


In [59]:
# Block 8: Predict sales for the next 6 months
current_month_data = X.iloc[-1:].copy()  # Get the last row as the current month's data and make a copy
predicted_sales_next_6_months = []

for _ in range(6):
    predicted_sales = pipeline.predict(current_month_data)
    predicted_sales_next_6_months.append(predicted_sales[0])
    # Update the current month's data with the predicted sales using .loc
    current_month_data.loc[:, 'SALES'] = predicted_sales


In [60]:
# Block 9: Print the predicted sales for the next 6 months
for i, sales in enumerate(predicted_sales_next_6_months):
    print(f"Month {i+1} - Predicted Sales: {sales:.2f}")


Month 1 - Predicted Sales: 2943.85
Month 2 - Predicted Sales: 2841.59
Month 3 - Predicted Sales: 2764.46
Month 4 - Predicted Sales: 2706.30
Month 5 - Predicted Sales: 2662.43
Month 6 - Predicted Sales: 2629.34


In [64]:
# Block 10: Determine profit or loss
# assuming costs are constant or known
costs = np.mean(df['PRICEEACH']) * 0.75  # costs are 0.75 times the average price each
predicted_profits_next_6_months = np.array(predicted_sales_next_6_months) - costs

In [67]:
# Block 11: Print the predicted profit/loss for the next 6 months with indicators for profit and loss
for i, profit_loss in enumerate(predicted_profits_next_6_months):
    # Check if the profit_loss is positive (profit) or negative (loss)
    if profit_loss > 0:
        outcome = "Profit"
    else:
        outcome = "Loss"
    # Print the month, the outcome, and the amount
    print(f"Month {i+1} - Predicted {outcome}: +{abs(profit_loss):.2f}") #if in dollars

Month 1 - Predicted Profit: +2881.10
Month 2 - Predicted Profit: +2778.84
Month 3 - Predicted Profit: +2701.72
Month 4 - Predicted Profit: +2643.55
Month 5 - Predicted Profit: +2599.69
Month 6 - Predicted Profit: +2566.60
