In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

In [4]:
data = pd.read_csv("Sales_December_2019.csv")
data.head()

EmptyDataError: No columns to parse from file

In [None]:
# Remove empty rows
data = data.dropna()

# Remove repeated header rows
data = data[data['Order Date'] != 'Order Date']

data.head()

In [None]:
# Convert columns to correct data types
data['Order Date'] = pd.to_datetime(data['Order Date'])
data['Quantity Ordered'] = pd.to_numeric(data['Quantity Ordered'])
data['Price Each'] = pd.to_numeric(data['Price Each'])

In [None]:
# Ensure Sales column exists
data['Sales'] = data['Quantity Ordered'] * data['Price Each']

In [None]:
data['Month'] = data['Order Date'].dt.month
data[['Order Date', 'Month', 'Sales']].head()

In [None]:
monthly_sales = data.groupby('Month')['Sales'].sum()
monthly_sales

In [None]:
plt.figure()
plt.plot(monthly_sales.index, monthly_sales.values, marker='o')
plt.xlabel("Month")
plt.ylabel("Total Sales")
plt.title("Monthly Sales Trend")
plt.grid()
plt.show()

In [None]:
X = np.array(monthly_sales.index).reshape(-1, 1)
y = monthly_sales.values

In [None]:
model = LinearRegression()
model.fit(X, y)

In [None]:
future_months = np.array([13, 14, 15]).reshape(-1, 1)
future_sales = model.predict(future_months)

future_sales

In [None]:
plt.figure()
plt.plot(X, y, label="Actual Sales", marker='o')
plt.plot(future_months, future_sales, label="Forecasted Sales", linestyle='--', marker='x')
plt.xlabel("Month")
plt.ylabel("Sales")
plt.title("Sales Forecasting")
plt.legend()
plt.grid()
plt.show()