In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.impute import SimpleImputer
from sklearn.ensemble import HistGradientBoostingRegressor

In [2]:
# Load data
sales = pd.read_excel('D:/CoUrSE/PRojEcT/Sales.xlsx')
products = pd.read_excel('D:/CoUrSE/PRojEcT/Products.xlsx')
time = pd.read_excel('D:/CoUrSE/PRojEcT/Time.xlsx')

In [3]:
# Merge sales with product and time dimensions
df_sales_product = pd.merge(sales, products, on='Product_ID')
df_sales_time = pd.merge(df_sales_product, time, on='Date_ID')

In [4]:
# Example feature engineering: Sales amount, product price, month, quarter
df_sales_time['Month'] = pd.to_datetime(df_sales_time['Transaction_Date']).dt.month
df_sales_time['Quarter'] = df_sales_time['Quarterr']

In [5]:
# Select features and target variable
X = df_sales_time[['Price', 'Month', 'Quarter']]
y = df_sales_time['Amount']

In [6]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [7]:
print(X_train.dtypes)

Price        int64
Month      float64
Quarter    float64
dtype: object


In [8]:
print(X_train.isnull().sum())  # Check for null values in X_train
print(y_train.isnull().sum())  # Check for null values in y_train

Price       0
Month      28
Quarter    28
dtype: int64
11


In [9]:
X_train.fillna(X_train.median(), inplace=True) #inplace null values by median
y_train.fillna(y_train.median(), inplace=True) #inplace null values by median

In [17]:
print(X_train.isnull().sum())  # Check for null values in X_train
print(y_train.isnull().sum())  # Check for null values in y_train

Price      0
Month      0
Quarter    0
dtype: int64
0


In [12]:
# Drop rows with missing values in both train and test sets
X_train = X_train.dropna()
y_train = y_train[X_train.index]

X_test = X_test.dropna()
y_test = y_test[X_test.index]

In [13]:
# Ensure X_test and y_test are clean and aligned
X_test = X_test[y_test.notna()]
y_test = y_test.dropna()

In [18]:
print(X_test.isnull().sum())  # Check for null values in X_test
print(y_test.isnull().sum())  # Check for null values in y_test

Price      0
Month      0
Quarter    0
dtype: int64
0


In [14]:
# Train a Random Forest Regressor
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [15]:
# Make predictions
y_pred = model.predict(X_test)

In [19]:
# Evaluate the model
print("MAE:", mean_absolute_error(y_test, y_pred))
print("RMSE:", mean_squared_error(y_test, y_pred, squared=False))

MAE: 1.305002380952381
RMSE: 1.537626348420928
