# 📈 Revenue Forecasting for E-commerce

Predict future sales using historical data with a Random Forest model.

In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv('../data/ecommerce_sales.csv', parse_dates=['date'])
df['month'] = df['date'].astype('datetime64[M]')
monthly_sales = df.groupby('month').agg({'sales': 'sum'}).reset_index()
monthly_sales['month_num'] = range(len(monthly_sales))
X = monthly_sales[['month_num']]
y = monthly_sales['sales']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
r2 = r2_score(y_test, y_pred)
r2

In [None]:
plt.figure(figsize=(10, 6))
sns.lineplot(x=monthly_sales['month'], y=monthly_sales['sales'], label='Actual')
sns.lineplot(x=monthly_sales.loc[y_test.index, 'month'], y=y_pred, label='Predicted')
plt.title('Monthly Revenue Forecast')
plt.xlabel('Month')
plt.ylabel('Sales')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()