### Import Libraries
This block imports necessary libraries:
- `datetime` for handling date and time.
- `pyodbc` for database connection.
- `pandas` for data manipulation.
- `matplotlib.pyplot` and `seaborn` for visualization.


In [None]:
import datetime
import pyodbc
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


### Establish Database Connection
This block sets up a connection to the database using specified parameters. Replace `your_server_name`, `your_database_name`, `your_username`, and `your_password` with actual values.


In [None]:
# Database connection parameters
server = 'your_server_name' 
database = 'your_database_name' 
username = 'your_username' 
password = 'your_password'  

# Establish database connection
cnxn = pyodbc.connect('DRIVER={SQL Server};SERVER='+server+';DATABASE='+database+';UID='+username+';PWD='+password)
cursor = cnxn.cursor()


### Load Data
This block executes SQL queries to retrieve data from the `SalesPrice` and `MOA_SalesPrice` tables and loads it into Pandas DataFrames.


In [None]:
# Define SQL queries to fetch data
query_sales = "SELECT * FROM [SalesPrice];"
df_sales = pd.read_sql(query_sales, cnxn)

query_moa = "SELECT * FROM [MOA_SalesPrice];"
df_sales_moa = pd.read_sql(query_moa, cnxn)


### Data Preprocessing
This block preprocesses the data by:
- Removing the `timestamp` column.
- Converting `Starting Date` and `Ending Date` columns to datetime format.


In [None]:
# Data preprocessing for SalesPrice
df_sales = df_sales.drop(['timestamp'], axis=1)
df_sales['Starting Date'] = pd.to_datetime(df_sales['Starting Date'], errors='coerce').dt.date
df_sales['Ending Date'] = pd.to_datetime(df_sales['Ending Date'], errors='coerce')

# Data preprocessing for MOA Sales Price
df_sales_moa = df_sales_moa.drop(['timestamp'], axis=1)
df_sales_moa['Starting Date'] = pd.to_datetime(df_sales_moa['Starting Date'], errors='coerce').dt.date
df_sales_moa['Ending Date'] = pd.to_datetime(df_sales_moa['Ending Date'], errors='coerce')


### Find Extra Rows in MOA
This block finds rows in the MOA dataset that do not exist in the Sales dataset. It saves these extra rows to a CSV file.


In [None]:
# Merge DataFrames to find extra rows in MOA
df_merged_moa = df_sales_moa.merge(df_sales, on=['Item No_', 'Sales Code', 'Starting Date', 'Variant Code', 'Unit of Measure Code'], suffixes=('_moa', '_sales'), how='left', indicator=True)
left_only_moa = df_merged_moa[df_merged_moa['_merge'] == 'left_only']
columns_to_drop_moa = left_only_moa.filter(regex='_sales$', axis=1).columns
left_only_moa = left_only_moa.drop(columns=columns_to_drop_moa)

# Save extra rows to CSV
now = datetime.datetime.now()
now_str = now.strftime("%Y-%m-%d-%H-%M-%S")
filename_moa = f'E:\\Extra_rows_MOA_{now_str}.csv'
left_only_moa.to_csv(filename_moa, index=False)


### Find Extra Rows in Sales Price
This block finds rows in the Sales Price dataset that do not exist in the MOA dataset. It saves these extra rows to a CSV file.


In [None]:
# Merge DataFrames to find extra rows in Sales Price
df_merged_sales = df_sales.merge(df_sales_moa, on=['Item No_', 'Sales Code', 'Starting Date', 'Variant Code', 'Unit of Measure Code'], suffixes=('_sales', '_moa'), how='left', indicator=True)
left_only_sales = df_merged_sales[df_merged_sales['_merge'] == 'left_only']
columns_to_drop_sales = left_only_sales.filter(regex='_moa$', axis=1).columns
left_only_sales = left_only_sales.drop(columns=columns_to_drop_sales)

# Save extra rows to CSV
filename_sales = f'E:\\Extra_rows_Sales_{now_str}.csv'
left_only_sales.to_csv(filename_sales, index=False)


### Analyze Differences in Unit Prices
This block merges the Sales and MOA DataFrames to find differences in unit prices. It calculates the price differences and saves the results to a CSV file.


In [None]:
# Merge DataFrames to find differences in unit prices
df_merged_prices = df_sales.merge(df_sales_moa, on=["Item No_", "Starting Date", "Sales Code", "Ending Date", "Unit of Measure Code", 'Variant Code'], suffixes=('_sales', '_moa'), how='inner')

# Select relevant columns and preprocess
df_price_diff = df_merged_prices[["Item No_", "Starting Date", "Sales Code", "Ending Date", 'Unit of Measure Code', 'Variant Code', 'Unit Price_sales', 'Unit Price_moa']]
df_price_diff['Ending Date'] = pd.to_datetime(df_price_diff['Ending Date'], errors='coerce')
date_to = pd.to_datetime('1753-01-01 00:00:00.000')
df_price_diff = df_price_diff[df_price_diff['Ending Date'] == date_to]

# Round unit prices
decimal_places = 2
df_price_diff['Unit Price_sales'] = df_price_diff['Unit Price_sales'].round(decimal_places)
df_price_diff['Unit Price_moa'] = df_price_diff['Unit Price_moa'].round(decimal_places)

# Find differences in unit prices
df_price_diff['check'] = df_price_diff['Unit Price_sales'].eq(df_price_diff['Unit Price_moa'])
df_price_diff = df_price_diff[df_price_diff['check'] == False]
df_price_diff['difference'] = df_price_diff['Unit Price_sales'] - df_price_diff['Unit Price_moa']

# Save the results to a CSV file
filename_diff = f'E:\\Different_prices_{now_str}.csv'
df_price_diff.to_csv(filename_diff, index=False)


### Scatter Plot of Unit Price Differences

This block creates a scatter plot to visualize the relationship between `Unit Price_Sales` and `Unit Price_notSales`. 

- **Purpose:** To identify discrepancies between sales prices and non-sales prices.
- **Insight:** Points that deviate significantly from the line \( y = x \) indicate discrepancies between the two datasets.


In [None]:
# Scatter Plot: Unit Price Differences
plt.figure(figsize=(10, 6))
plt.scatter(merged_salesPro_MOA['Unit Price_Sales'], merged_salesPro_MOA['Unit Price_notSales'], alpha=0.5)
plt.title('Scatter Plot of Unit Price Differences')
plt.xlabel('Unit Price_Sales')
plt.ylabel('Unit Price_notSales')
plt.grid(True)
plt.savefig(f'E:\\Sales prices 24 branch\\Scatter_Plot_Unit_Price_Differences_{now_str}.png')
plt.show()


### Box Plot of Unit Prices

This block generates a box plot to compare the distribution of unit prices between sales and non-sales items.

- **Purpose:** To visualize the range, median, and outliers in unit prices.
- **Insight:** The box plot helps identify differences in price distributions and outliers between the two datasets.


In [None]:
# Box Plot: Distribution of Unit Prices
plt.figure(figsize=(12, 6))
price_data = pd.DataFrame({
    'Price': pd.concat([merged_salesPro_MOA['Unit Price_Sales'], merged_salesPro_MOA['Unit Price_notSales']]),
    'Type': ['Sales Price'] * len(merged_salesPro_MOA['Unit Price_Sales']) +
            ['Not Sales Price'] * len(merged_salesPro_MOA['Unit Price_notSales'])
})
sns.boxplot(x='Type', y='Price', data=price_data)
plt.title('Box Plot of Unit Prices')
plt.grid(True)
plt.savefig(f'E:\\Sales prices 24 branch\\Box_Plot_Unit_Prices_{now_str}.png')
plt.show()


### Histogram of Price Differences

This block plots a histogram to display the distribution of price differences.

- **Purpose:** To understand how common various levels of discrepancies are.
- **Insight:** The histogram shows the frequency distribution of price differences, highlighting how often specific discrepancies occur.


In [None]:
# Histogram: Distribution of Price Differences
plt.figure(figsize=(10, 6))
plt.hist(merged_salesPro_MOA['difference'], bins=30, edgecolor='black', alpha=0.7)
plt.title('Histogram of Price Differences')
plt.xlabel('Price Difference')
plt.ylabel('Frequency')
plt.grid(True)
plt.savefig(f'E:\\Sales prices 24 branch\\Histogram_Price_Differences_{now_str}.png')
plt.show()


### Line Chart of Unit Prices Over Time

This block creates a line chart to visualize changes in unit prices over time, based on the starting dates.

- **Purpose:** To analyze temporal trends in unit prices.
- **Insight:** The line chart shows how prices change over time and compares sales prices with non-sales prices, helping to identify trends or significant changes.


In [None]:
# Line Chart: Temporal Trends of Unit Prices
plt.figure(figsize=(12, 6))
plt.plot(merged_salesPro_MOA['Starting Date'], merged_salesPro_MOA['Unit Price_Sales'], label='Sales Price', color='blue')
plt.plot(merged_salesPro_MOA['Starting Date'], merged_salesPro_MOA['Unit Price_notSales'], label='Not Sales Price', color='red')
plt.title('Line Chart of Unit Prices Over Time')
plt.xlabel('Starting Date')
plt.ylabel('Unit Price')
plt.legend()
plt.grid(True)
plt.savefig(f'E:\\Sales prices 24 branch\\Line_Chart_Temporal_Trends_{now_str}.png')
plt.show()
