# Regressiemodellen

In [1]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import pyodbc
from sqlalchemy import create_engine

# Database connectie
De data komt uit de datawarehouse.

In [2]:
# database name
DB = {
    'servername': '(local)\\SQLEXPRESS',
    'database': 'DEDS_DataWarehouse'}

In [3]:
export_conn = pyodbc.connect('DRIVER={SQL Server};SERVER=' + DB['servername'] + ';DATABASE=' + DB['database'] + ';Trusted_Connection=yes')


# Create a cursor from the connection
export_cursor = export_conn.cursor()

# check if connection is successful, else throw an error
if export_conn:
    print("Connection with database is established")
else:
    print("Connection with database is not established")
    raise Exception("Connection with database is not established")

Connection with database is established


# Tabellen inlezen
We hebben de order_header, order_details en order tabellen nodig

In [4]:
order_query = "SELECT * FROM [Order]"

order_result = export_cursor.execute(order_query)
order_fetch = order_result.fetchall()
order_columns = [column[0] for column in order_result.description]
order = pd.DataFrame.from_records(order_fetch, columns=order_columns)

order.head()

Unnamed: 0,ORDER_SK,ORDER_order_number,ORDER_ORDER_METHOD_CODE_method_code,ORDER_ORDER_METHOD_EN_method,LAST_UPDATED,CURRENT_VALUE
0,1,1153,7,Sales visit,2024-03-20 13:22:18.9210287,True
1,2,1154,4,E-mail,2024-03-20 13:22:18.9362991,True
2,3,1155,7,Sales visit,2024-03-20 13:22:18.9378535,True
3,4,1156,5,Web,2024-03-20 13:22:18.9414257,True
4,5,1157,2,Telephone,2024-03-20 13:22:18.9439944,True


In [5]:
order_header_query = "SELECT * FROM Order_header"

order_header_result = export_cursor.execute(order_header_query)
order_header_fetch = order_header_result.fetchall()
order_header_columns = [column[0] for column in order_header_result.description]
order_header = pd.DataFrame.from_records(order_header_fetch, columns=order_header_columns)

order_header.head()

Unnamed: 0,ORDER_HEADER_SK,ORDER_HEADER_number,ORDER_HEADER_RETAILER_CODE,ORDER_HEADER_SALES_STAFF_CODE,ORDER_HEADER_SALES_BRANCH_CODE,ORDER_HEADER_ORDER_DATE,ORDER_HEADER_RETAILER_SITE_CODE,ORDER_HEADER_RETAILER_CONTACT_CODE,ORDER_HEADER_ORDER_order_number,LAST_UPDATED,CURRENT_VALUE
0,1,1153,100,54,5,2020-04-14,285,349,1,2024-03-20 13:32:41.5863889,True
1,2,1154,100,54,5,2020-10-01,285,349,2,2024-03-20 13:32:41.5997189,True
2,3,1155,100,52,5,2020-04-21,340,355,3,2024-03-20 13:32:41.5997189,True
3,4,1156,100,52,5,2020-09-09,340,355,4,2024-03-20 13:32:41.6013817,True
4,5,1157,100,54,5,2020-04-10,351,356,5,2024-03-20 13:32:41.6032862,True


In [6]:
order_details_query = "SELECT * FROM Order_details"

order_details_result = export_cursor.execute(order_details_query)
order_details_fetch = order_details_result.fetchall()
order_details_columns = [column[0] for column in order_details_result.description]
order_details = pd.DataFrame.from_records(order_details_fetch, columns=order_details_columns)

order_details.head()

Unnamed: 0,ORDER_DETAILS_SK,ORDER_DETAILS_code,ORDER_DETAILS_QUANTITY_quantity,ORDER_DETAILS_TOTAL_COST_total,ORDER_DETAILS_TOTAL_MARGIN_margin,ORDER_DETAILS_RETURN_CODE_returned,ORDER_DETAILS_ORDER_NUMBER_order,ORDER_DETAILS_PRODUCT_NUMBER_product,ORDER_DETAILS_UNIT_ID_unit,LAST_UPDATED,CURRENT_VALUE
0,1,100146,16,8072.64,9496.0,1975,4408,23,144,2024-03-20 13:22:40.1478376,True
1,2,100149,64,65.28,131.2,1976,5013,24,147,2024-03-20 13:22:40.1630588,True
2,3,100199,22,149.16,298.54,1979,5010,40,197,2024-03-20 13:22:40.1645917,True
3,4,100250,52,6578.52,9868.04,2298,5135,54,248,2024-03-20 13:22:40.1656051,True
4,5,100641,6,491.22,1080.78,2301,5150,15,635,2024-03-20 13:22:40.1671101,True


## De tabellen samenvoegen tot 1 dataframe

In [7]:
order_combined = pd.merge(order_header, order_details, left_on='ORDER_HEADER_ORDER_order_number', right_on='ORDER_DETAILS_ORDER_NUMBER_order')
order_combined = pd.merge(order_combined, order, left_on='ORDER_HEADER_ORDER_order_number', right_on='ORDER_order_number')

order_combined.head()

Unnamed: 0,ORDER_HEADER_SK,ORDER_HEADER_number,ORDER_HEADER_RETAILER_CODE,ORDER_HEADER_SALES_STAFF_CODE,ORDER_HEADER_SALES_BRANCH_CODE,ORDER_HEADER_ORDER_DATE,ORDER_HEADER_RETAILER_SITE_CODE,ORDER_HEADER_RETAILER_CONTACT_CODE,ORDER_HEADER_ORDER_order_number,LAST_UPDATED_x,...,ORDER_DETAILS_PRODUCT_NUMBER_product,ORDER_DETAILS_UNIT_ID_unit,LAST_UPDATED_y,CURRENT_VALUE_y,ORDER_SK,ORDER_order_number,ORDER_ORDER_METHOD_CODE_method_code,ORDER_ORDER_METHOD_EN_method,LAST_UPDATED,CURRENT_VALUE
0,1132,2423,26,75,26,2021-10-12,111,111,1159,2024-03-20 13:32:43.7943092,...,81,20769,2024-03-20 13:22:40.5689616,True,7,1159,7,Sales visit,2024-03-20 13:22:18.9465432,True
1,1145,2438,85,64,26,2020-02-13,91,94,1174,2024-03-20 13:32:43.8266467,...,43,19871,2024-03-20 13:22:40.5475428,True,21,1174,2,Telephone,2024-03-20 13:22:18.9622595,True
2,1156,2449,84,64,26,2020-09-25,87,92,1185,2024-03-20 13:32:43.8580160,...,84,19969,2024-03-20 13:22:40.5495860,True,31,1185,2,Telephone,2024-03-20 13:22:18.9744354,True
3,1157,2450,88,75,26,2020-09-19,101,104,1186,2024-03-20 13:32:43.8605420,...,41,19848,2024-03-20 13:22:40.5449854,True,32,1186,1,Fax,2024-03-20 13:22:18.9744354,True
4,1160,2453,88,53,26,2021-10-04,101,104,1189,2024-03-20 13:32:43.8682479,...,87,20803,2024-03-20 13:22:40.5730802,True,35,1189,2,Telephone,2024-03-20 13:22:18.9779648,True
