# <span style="font-size: 28px;"><b>Constructing a Basic Predictive Model</b></span>

## **Step 1: Load the Dataset**

In [None]:
pip install pandas pyodbc scikit-learn numpy

In [None]:
import pandas as pd
import pyodbc

server = 'SQLEXPRESS'
database = 'Transactions'
driver= '{SQL Server}'
trusted= 'yes'

# Connection string
conn_str = f'DRIVER={driver};SERVER={server};DATABASE={database};Trusted_Connection={trusted}'

# Connect to the database
conn = pyodbc.connect(conn_str)
cursor = conn.cursor()

query = '''
SELECT transaction_id, customer_id, 
       CAST(date AS DATE) as date, 
       CAST(time AS TIME) as time, 
       product_name, category, quantity, price
FROM [Transactions].[dbo].[Transactions]
'''
df = pd.read_sql_query(query, conn)

df['datetime'] = pd.to_datetime(df['date'].astype(str) + ' ' + df['time'].astype(str))
df.set_index('datetime', inplace=True)

# Creating lag features
df['price_lag1'] = df['price'].shift(1)

# Extracting date-time features
df['day_of_week'] = df.index.dayofweek
df['month'] = df.index.month

# Creating rolling window features
df['rolling_mean_7'] = df['price'].rolling(window=7).mean()
df['rolling_std_7'] = df['price'].rolling(window=7).std()

# Handling missing values
df.dropna(inplace=True)

## **Step 2:** **Feature Engineering and Standardization**

In [None]:
from sklearn.preprocessing import StandardScaler

features = ['price_lag1', 'rolling_mean_7', 'rolling_std_7', 'day_of_week', 'month']

X = df[features]  # Extracting the features from the DataFrame
y = df['price']  # Price is your target column

# Initialize the StandardScaler
scaler = StandardScaler()

# Fit the scaler to your data and transform it
X_scaled = scaler.fit_transform(X)

## **Step 3:** **Split the Data into Training and Testing Sets**

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=44)

## **Step 4:** **Model Selection**

In [None]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()

## **Step 5:** **Training the Model**

In [None]:
model.fit(X_train, y_train)

## **Step 6:** **Making Predictions**

In [None]:
predictions = model.predict(X_test)

## **Step 7:** **Evaluating the Model**

In [10]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

print(f"MAE: {mean_absolute_error(y_test, predictions)}")
print(f"MSE: {mean_squared_error(y_test, predictions)}")
print(f"R^2: {r2_score(y_test, predictions)}")

MAE: 121.68773956797087
MSE: 35005.128442713525
R^2: 0.40510710247653503


## **Step 8:** **Model Interpretation**

In [11]:
print(f"Coefficients: {model.coef_}")

Coefficients: [-85.09255101   2.76980393 198.99045319 -17.71906835  -9.43592015]
