In [10]:
# Importing required libraries

import pandas as pd
import numpy as np
import sqlalchemy 
from sqlalchemy.exc import SQLAlchemyError 
from datetime import datetime
from sqlalchemy import create_engine
import logging 
import pyodbc

In [11]:

# Define the log filename with a timestamp
log_filename = f"log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"

# Set up logging to file
file_handler = logging.FileHandler(log_filename)
file_handler.setLevel(logging.INFO)
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))

# Set up logging to console
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))

# Add both handlers to the root logger
logging.getLogger().setLevel(logging.INFO)
logging.getLogger().addHandler(file_handler)
logging.getLogger().addHandler(console_handler)

In [24]:
# Define the database connection string

DATABASE_CONNECTION_STRING = (
    "Driver={ODBC Driver 17 for SQL Server};"
    "Server=GENTLE-BEE\SQLEXPRESS;"
    "Database=AdventureWorks2019;"
    "Trusted_Connection=yes;"
)

try:
    conn = pyodbc.connect(DATABASE_CONNECTION_STRING)
    print("Connection successful")
    conn.close()
except Exception as e:
    print(f"Connection failed: {e}")


Connection successful


In [25]:
# Defining the needed functions 

def execute_sql(query):
    """
    Execute SQL query using a hardcoded connection string.

    Parameters:
        query (str): SQL query to execute.

    Returns:
        None
    """

    try:
        logging.info("Attempting to connect to the database for executing SQL.")
        conn = pyodbc.connect(DATABASE_CONNECTION_STRING)
        cursor = conn.cursor()
        logging.info(f"Executing query: {query}")
        cursor.execute(query)
        conn.commit()
        logging.info("SQL query executed successfully.")
    except Exception as e:
        logging.error(f"Error executing query: {e}")
        print(f"Error executing query: {e}")
    finally:
        if cursor:
            cursor.close()
        conn.close()
        logging.info("Database connection closed after executing SQL.")

def upload_data(table, dataframe, upload_type):
    """
    Upload data to a specified table in the database.

    Parameters:
        table (str): Name of the table to upload data.
        dataframe (DataFrame): Pandas DataFrame containing data to upload.
        upload_type (str): Method of upload ('replace', 'append', etc.).

    Returns:
        None
    """
    try:
        logging.info("Attempting to connect to the database for uploading data.")
        engine = create_engine(f"mssql+pyodbc:///?odbc_connect={DATABASE_CONNECTION_STRING}")
        logging.info(f"Uploading data to table: {table}")
        dataframe.to_sql(table, engine, index=False, if_exists=upload_type, schema="dbo", chunksize=10000)
        logging.info(f"Data uploaded successfully to {table}.")
    except Exception as e:
        logging.error(f"Error uploading data: {e}")
        print(f"Error uploading data: {e}")

def retrieve_data(query):
    """
    Retrieve data from the database using SQL query.

    Parameters:
        query (str): SQL query to retrieve data.

    Returns:
        DataFrame: Pandas DataFrame containing retrieved data.
    """
    try:
        logging.info("Attempting to connect to the database for retrieving data.")
        engine = create_engine(f"mssql+pyodbc:///?odbc_connect={DATABASE_CONNECTION_STRING}")
        logging.info(f"Retrieving data with query: {query}")
        df = pd.read_sql(query, engine)
        logging.info("Data retrieved successfully.")
    except Exception as e:
        logging.error(f"Error retrieving data: {e}")
        print(f"Error retrieving data: {e}")
        df = pd.DataFrame()  # Return empty DataFrame in case of error
    return df

In [26]:

# Creating the CustomersFeedback Table
create_table = """
CREATE TABLE CustomerFeedback (
    FeedbackID INT PRIMARY KEY, 
    CustomerID INT,
    FeedbackDate DATE,
    Comments VARCHAR(255)
);
"""

# Execute the query to create the table
try:
    execute_sql(create_table)
    logging.info("Table created successfully.")
    print("Table created successfully.")
except Exception as e:
    logging.error(f"Failed to create table: {e}")
    print(f"Failed to create table: {e}")

2024-06-16 19:58:19,579 - INFO - Attempting to connect to the database for executing SQL.
2024-06-16 19:58:19,579 - INFO - Attempting to connect to the database for executing SQL.
2024-06-16 19:58:19,579 - INFO - Attempting to connect to the database for executing SQL.
2024-06-16 19:58:19,607 - INFO - Executing query: 
CREATE TABLE CustomerFeedback (
    FeedbackID INT PRIMARY KEY, 
    CustomerID INT,
    FeedbackDate DATE,
    Comments VARCHAR(255)
);

2024-06-16 19:58:19,607 - INFO - Executing query: 
CREATE TABLE CustomerFeedback (
    FeedbackID INT PRIMARY KEY, 
    CustomerID INT,
    FeedbackDate DATE,
    Comments VARCHAR(255)
);

2024-06-16 19:58:19,607 - INFO - Executing query: 
CREATE TABLE CustomerFeedback (
    FeedbackID INT PRIMARY KEY, 
    CustomerID INT,
    FeedbackDate DATE,
    Comments VARCHAR(255)
);

2024-06-16 19:58:19,650 - INFO - SQL query executed successfully.
2024-06-16 19:58:19,650 - INFO - SQL query executed successfully.
2024-06-16 19:58:19,650 - INFO 

Table created successfully.


In [27]:
# Uploading data into the CustomerFeedback Table
data_to_upload = pd.DataFrame({
    'Feedback': [1, 2, 3, 4, 5, 6,7,8,9],
    'CustomerID': ['A', 'B', 'C', 'D','E','F','G','H', 'I'],
    'FeedbackDate': ['2024-01-01','2024-01-02','2024-01-02','2024-01-03','2024-01-04','2024-01-05','2024-01-06','2024-01-07','2024-01-08'],
    'Comments': ['Good', 'Bad', 'Good', 'Bad', 'Good', 'Bad', 'Good', 'Bad', 'Good']
})

# Specify the table name and the upload type
table_name = "CustomerFeedback"
upload_type = "replace"  #N/B: The upload_type could be append too

# Upload data to the database
try:
    upload_data(table_name, data_to_upload, upload_type)
    logging.info("Data uploaded successfully.")
    print("Data uploaded successfully.")
except Exception as e:
    logging.error(f"Failed to upload data: {e}")
    print(f"Failed to upload data: {e}")

2024-06-16 20:09:44,420 - INFO - Attempting to connect to the database for uploading data.
2024-06-16 20:09:44,420 - INFO - Attempting to connect to the database for uploading data.
2024-06-16 20:09:44,420 - INFO - Attempting to connect to the database for uploading data.
2024-06-16 20:09:44,421 - INFO - Uploading data to table: CustomerFeedback
2024-06-16 20:09:44,421 - INFO - Uploading data to table: CustomerFeedback
2024-06-16 20:09:44,421 - INFO - Uploading data to table: CustomerFeedback
2024-06-16 20:09:46,402 - INFO - Data uploaded successfully to CustomerFeedback.
2024-06-16 20:09:46,402 - INFO - Data uploaded successfully to CustomerFeedback.
2024-06-16 20:09:46,402 - INFO - Data uploaded successfully to CustomerFeedback.
2024-06-16 20:09:46,402 - INFO - Data uploaded successfully.
2024-06-16 20:09:46,402 - INFO - Data uploaded successfully.
2024-06-16 20:09:46,402 - INFO - Data uploaded successfully.


Data uploaded successfully.


In [28]:
renaming_column = """
EXEC sp_rename 'CustomerFeedback.Feedback', 'FeedbackID', 'COLUMN';
"""

# Execute an SQL query
try:
    execute_sql(renaming_column)
    logging.info("Column renamed successfully.")
    print("Column renamed successfully.")
except Exception as e:
    logging.error(f"Failed to rename column: {e}")
    print(f"Failed to rename column: {e}")

2024-06-16 20:20:11,788 - INFO - Attempting to connect to the database for executing SQL.
2024-06-16 20:20:11,788 - INFO - Attempting to connect to the database for executing SQL.
2024-06-16 20:20:11,788 - INFO - Attempting to connect to the database for executing SQL.
2024-06-16 20:20:11,819 - INFO - Executing query: 
EXEC sp_rename 'CustomerFeedback.Feedback', 'FeedbackID', 'COLUMN';

2024-06-16 20:20:11,819 - INFO - Executing query: 
EXEC sp_rename 'CustomerFeedback.Feedback', 'FeedbackID', 'COLUMN';

2024-06-16 20:20:11,819 - INFO - Executing query: 
EXEC sp_rename 'CustomerFeedback.Feedback', 'FeedbackID', 'COLUMN';

2024-06-16 20:20:12,341 - INFO - SQL query executed successfully.
2024-06-16 20:20:12,341 - INFO - SQL query executed successfully.
2024-06-16 20:20:12,341 - INFO - SQL query executed successfully.
2024-06-16 20:20:12,349 - INFO - Database connection closed after executing SQL.
2024-06-16 20:20:12,349 - INFO - Database connection closed after executing SQL.
2024-06-16

Column renamed successfully.


In [29]:
# Updating the CustomerFeedback Table
update_query = """
UPDATE CustomerFeedback
SET FeedbackID= 10
WHERE FeedbackID = 9
"""
# Execute an SQL query
try:
    execute_sql(update_query)
    logging.info("SQL query executed successfully.")
    print("SQL query executed successfully.")
except Exception as e:
    logging.error(f"Failed to execute SQL query: {e}")
    print(f"Failed to execute SQL query: {e}")

2024-06-16 20:20:42,346 - INFO - Attempting to connect to the database for executing SQL.
2024-06-16 20:20:42,346 - INFO - Attempting to connect to the database for executing SQL.
2024-06-16 20:20:42,346 - INFO - Attempting to connect to the database for executing SQL.
2024-06-16 20:20:42,379 - INFO - Executing query: 
UPDATE CustomerFeedback
SET FeedbackID= 10
WHERE FeedbackID = 9

2024-06-16 20:20:42,379 - INFO - Executing query: 
UPDATE CustomerFeedback
SET FeedbackID= 10
WHERE FeedbackID = 9

2024-06-16 20:20:42,379 - INFO - Executing query: 
UPDATE CustomerFeedback
SET FeedbackID= 10
WHERE FeedbackID = 9

2024-06-16 20:20:42,399 - INFO - SQL query executed successfully.
2024-06-16 20:20:42,399 - INFO - SQL query executed successfully.
2024-06-16 20:20:42,399 - INFO - SQL query executed successfully.
2024-06-16 20:20:42,405 - INFO - Database connection closed after executing SQL.
2024-06-16 20:20:42,405 - INFO - Database connection closed after executing SQL.
2024-06-16 20:20:42,40

SQL query executed successfully.


In [30]:
# Updating the CustomerFeedback Table
delete_query = """
DELETE 
FROM CustomerFeedback
WHERE FeedbackID = 10
"""
# Execute an SQL query
try:
    execute_sql(delete_query)
    logging.info("SQL query executed successfully.")
    print("SQL query executed successfully.")
except Exception as e:
    logging.error(f"Failed to execute SQL query: {e}")
    print(f"Failed to execute SQL query: {e}")

2024-06-16 20:35:38,059 - INFO - Attempting to connect to the database for executing SQL.
2024-06-16 20:35:38,059 - INFO - Attempting to connect to the database for executing SQL.
2024-06-16 20:35:38,059 - INFO - Attempting to connect to the database for executing SQL.
2024-06-16 20:35:38,103 - INFO - Executing query: 
DELETE 
FROM CustomerFeedback
WHERE FeedbackID = 10

2024-06-16 20:35:38,103 - INFO - Executing query: 
DELETE 
FROM CustomerFeedback
WHERE FeedbackID = 10

2024-06-16 20:35:38,103 - INFO - Executing query: 
DELETE 
FROM CustomerFeedback
WHERE FeedbackID = 10

2024-06-16 20:35:38,116 - INFO - SQL query executed successfully.
2024-06-16 20:35:38,116 - INFO - SQL query executed successfully.
2024-06-16 20:35:38,116 - INFO - SQL query executed successfully.
2024-06-16 20:35:38,120 - INFO - Database connection closed after executing SQL.
2024-06-16 20:35:38,120 - INFO - Database connection closed after executing SQL.
2024-06-16 20:35:38,120 - INFO - Database connection close

SQL query executed successfully.


In [37]:
#  Creating ProductReviews Table
create_table = """
CREATE TABLE ProductReviews (
    id VARCHAR(MAX) PRIMARY KEY,
    asins VARCHAR(MAX),
    brand VARCHAR(MAX),
    categories VARCHAR(MAX),
    colors VARCHAR(MAX),
    dateAdded VARCHAR(MAX),
    dateUpdated VARCHAR(MAX),
    dimension VARCHAR(MAX),
    ean VARCHAR(MAX),
    keys VARCHAR(MAX),
    manufacturer VARCHAR(MAX),
    manufacturerNumber VARCHAR(MAX),
    name VARCHAR(MAX),
    prices VARCHAR(MAX),
    reviews_date VARCHAR(MAX),
    reviews_doRecommend VARCHAR(MAX),
    reviews_numHelpful VARCHAR(MAX),
    reviews_rating VARCHAR(MAX),
    reviews_sourceURLs VARCHAR(MAX),
    reviews_text VARCHAR(MAX),
    reviews_title VARCHAR(MAX),
    reviews_userCity VARCHAR(MAX),
    reviews_userProvince VARCHAR(MAX),
    reviews_username VARCHAR(MAX),
    sizes VARCHAR(MAX),
    upc VARCHAR(MAX),
    weight VARCHAR(MAX)

);
"""
# Execute the query to create the table
try:
    execute_sql(create_table)
    logging.info("Table created successfully.")
    print("Table created successfully.")
except Exception as e:
    logging.error(f"Failed to create table: {e}")
    print(f"Failed to create table: {e}")


2024-06-16 21:10:54,956 - INFO - Attempting to connect to the database for executing SQL.
2024-06-16 21:10:54,956 - INFO - Attempting to connect to the database for executing SQL.
2024-06-16 21:10:54,956 - INFO - Attempting to connect to the database for executing SQL.
2024-06-16 21:10:54,969 - INFO - Executing query: 
CREATE TABLE ProductReviews (
    id VARCHAR(MAX) PRIMARY KEY,
    asins VARCHAR(MAX),
    brand VARCHAR(MAX),
    categories VARCHAR(MAX),
    colors VARCHAR(MAX),
    dateAdded VARCHAR(MAX),
    dateUpdated VARCHAR(MAX),
    dimension VARCHAR(MAX),
    ean VARCHAR(MAX),
    keys VARCHAR(MAX),
    manufacturer VARCHAR(MAX),
    manufacturerNumber VARCHAR(MAX),
    name VARCHAR(MAX),
    prices VARCHAR(MAX),
    reviews_date VARCHAR(MAX),
    reviews_doRecommend VARCHAR(MAX),
    reviews_numHelpful VARCHAR(MAX),
    reviews_rating VARCHAR(MAX),
    reviews_sourceURLs VARCHAR(MAX),
    reviews_text VARCHAR(MAX),
    reviews_title VARCHAR(MAX),
    reviews_userCity VARCHAR

Error executing query: ('42000', "[42000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Column 'id' in table 'ProductReviews' is of a type that is invalid for use as a key column in an index. (1919) (SQLExecDirectW); [42000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Could not create constraint or index. See previous errors. (1750)")
Table created successfully.


In [38]:
# Read the CSV file into a DataFrame
csv_file_path = r'C:\Users\USER\Desktop\Data Stuff\Edtech\Datasets\Amazon Sales.csv'
try:
    df = pd.read_csv(csv_file_path)
    logging.info("CSV file loaded successfully.")
    print("CSV file loaded successfully.")
except Exception as e:
    logging.error(f"Failed to load CSV file: {e}")
    print(f"Failed to load CSV file: {e}")
    exit()

# Rename columns if needed
df = df.rename(columns={
    'reviews.date': 'reviews_date',
    'reviews.doRecommend': 'reviews_doRecommend',
    'reviews.numHelpful': 'reviews_numHelpful',
    'reviews.rating': 'reviews_rating',
    'reviews.sourceURLs': 'reviews_sourceURLs',
    'reviews.text': 'reviews_text',
    'reviews.title': 'reviews_title',
    'reviews.userCity': 'reviews_userCity',
    'reviews.userProvince': 'reviews_userProvince',
    'reviews.username': 'reviews_username'
})

# Specify the table name and the upload type
table_name = "ProductReviews"
data_to_upload = df
upload_type = "append"  # Or "replace" depending on your needs

# Upload data to the database
try:
    upload_data(table_name, data_to_upload, upload_type)
    logging.info("Data uploaded successfully.")
    print("Data uploaded successfully.")
except Exception as e:
    logging.error(f"Failed to upload data: {e}")
    print(f"Failed to upload data: {e}")

2024-06-16 21:11:01,561 - INFO - CSV file loaded successfully.
2024-06-16 21:11:01,561 - INFO - CSV file loaded successfully.
2024-06-16 21:11:01,561 - INFO - CSV file loaded successfully.
2024-06-16 21:11:01,561 - INFO - Attempting to connect to the database for uploading data.
2024-06-16 21:11:01,561 - INFO - Attempting to connect to the database for uploading data.
2024-06-16 21:11:01,561 - INFO - Attempting to connect to the database for uploading data.
2024-06-16 21:11:01,573 - INFO - Uploading data to table: ProductReviews
2024-06-16 21:11:01,573 - INFO - Uploading data to table: ProductReviews
2024-06-16 21:11:01,573 - INFO - Uploading data to table: ProductReviews


CSV file loaded successfully.


2024-06-16 21:11:06,010 - INFO - Data uploaded successfully to ProductReviews.
2024-06-16 21:11:06,010 - INFO - Data uploaded successfully to ProductReviews.
2024-06-16 21:11:06,010 - INFO - Data uploaded successfully to ProductReviews.
2024-06-16 21:11:06,013 - INFO - Data uploaded successfully.
2024-06-16 21:11:06,013 - INFO - Data uploaded successfully.
2024-06-16 21:11:06,013 - INFO - Data uploaded successfully.


Data uploaded successfully.


In [49]:

# Define the SQL query to retrieve last year's sales data
sales_query = """
    SELECT 
        ProductID, 
        SUM(OrderQty) AS TotalQty, 
        SUM(LineTotal) AS TotalSales
    FROM 
        Sales.SalesOrderDetail
    WHERE 
        YEAR(ModifiedDate) = 2014
    GROUP BY 
        ProductID
    ORDER BY 
        TotalSales DESC;
"""

# Define the CSV file path
csv_file = r'C:\Users\USER\Downloads\lastYearSales.csv'  # Update the path as per your system

try:
    logging.info("Attempting to connect to the database for retrieving data.")
    engine = create_engine(f"mssql+pyodbc:///?odbc_connect={DATABASE_CONNECTION_STRING}")
    logging.info(f"Retrieving data with query: {sales_query}")

    # Execute SQL query and read into a pandas DataFrame
    sales_data = pd.read_sql(sales_query, engine)
    logging.info("Data retrieved successfully.")

    # Export DataFrame to CSV
    sales_data.to_csv(csv_file, index=False)
    logging.info(f"Data exported to {csv_file} successfully.")

    # Check if the CSV file exists
    if os.path.exists(csv_file):
        print(f"Data retrieval and export to {csv_file} completed successfully.")
    else:
        print(f"Failed to export data to {csv_file}. CSV file not found.")
        
except Exception as e:
    logging.error(f"Error retrieving or exporting data: {e}")
    print(f"Error retrieving or exporting data: {e}")


2024-06-16 22:37:03,125 - INFO - Attempting to connect to the database for retrieving data.
2024-06-16 22:37:03,125 - INFO - Attempting to connect to the database for retrieving data.
2024-06-16 22:37:03,125 - INFO - Attempting to connect to the database for retrieving data.
2024-06-16 22:37:03,169 - INFO - Retrieving data with query: 
    SELECT 
        ProductID, 
        SUM(OrderQty) AS TotalQty, 
        SUM(LineTotal) AS TotalSales
    FROM 
        Sales.SalesOrderDetail
    WHERE 
        YEAR(ModifiedDate) = 2014
    GROUP BY 
        ProductID
    ORDER BY 
        TotalSales DESC;

2024-06-16 22:37:03,169 - INFO - Retrieving data with query: 
    SELECT 
        ProductID, 
        SUM(OrderQty) AS TotalQty, 
        SUM(LineTotal) AS TotalSales
    FROM 
        Sales.SalesOrderDetail
    WHERE 
        YEAR(ModifiedDate) = 2014
    GROUP BY 
        ProductID
    ORDER BY 
        TotalSales DESC;

2024-06-16 22:37:03,169 - INFO - Retrieving data with query: 
    SELECT 


Data retrieval and export to C:\Users\USER\Downloads\lastYearSales.csv completed successfully.


In [51]:
pd.options.display.float_format = '{:.2f}'.format

sales_data.head()

Unnamed: 0,ProductID,TotalQty,TotalSales
0,782,619,1045214.64
1,779,508,912463.2
2,783,513,909303.5
3,780,441,797855.7
4,781,432,792630.15
