### Connecting to Postgresql

In [3]:
pip install sqlalchemy

Note: you may need to restart the kernel to use updated packages.


In [4]:
pip install psycopg2-binary

Note: you may need to restart the kernel to use updated packages.


In [5]:
%load_ext sql
from sqlalchemy import create_engine
%config SqlMagic.style = '_DEPRECATED_DEFAULT'

In [6]:
pip install ipython-sql

Note: you may need to restart the kernel to use updated packages.


In [7]:
%sql postgresql://postgres:@localhost:5432/superstoredb

In [12]:
import pandas as pd
from sqlalchemy import create_engine

# Load the dataset from the provided file path
file_path = "superstore_update.csv"
df = pd.read_csv(file_path)

# Ensure the Quantity column is treated as an integer
# Convert Quantity to int after handling potential non-integer values
df["Quantity"] = df["Quantity"].fillna(0).astype(int)

# Create a dictionary to track ProductID to ProductName mapping
product_map = {}

# Function to generate a new ProductID by incrementing the last digit
def generate_new_product_id(product_id, increment):
    prefix = product_id[:-1]  # Remove the last character
    last_digit = int(product_id[-1]) if product_id[-1].isdigit() else 0
    return f"{prefix}{last_digit + increment}"

# Process the dataset
for index, row in df.iterrows():
    product_id = row["Product ID"]
    product_name = row["Product Name"]

    if product_id in product_map:
        # Check for mismatched Product Names
        if product_map[product_id] != product_name:
            # Generate a new ProductID for mismatched rows
            increment = 1
            new_product_id = generate_new_product_id(product_id, increment)
            while new_product_id in product_map:  # Ensure uniqueness
                increment += 1
                new_product_id = generate_new_product_id(product_id, increment)
            df.at[index, "Product ID"] = new_product_id
            product_map[new_product_id] = product_name
    else:
        # Add new ProductID and ProductName to the map
        product_map[product_id] = product_name

# Save the updated dataset to a new CSV file
output_path = "superstore_updated.csv"
df.to_csv(output_path, index=False)

# Display a confirmation message
print(f"Updated file saved to {output_path}")

# Optionally, save the updated data to PostgreSQL (example to a new table)
engine = create_engine('postgresql://postgres:@localhost:5432/superstoredb')
df.to_sql('updated_product_data', engine, if_exists='replace', index=False)

# Verify the data was updated
%sql SELECT * FROM updated_product_data LIMIT 5;



Updated file saved to superstore_updated.csv
 * postgresql://postgres:***@localhost:5432/superstoredb
5 rows affected.


Row ID,Order ID,Order Date,Ship Date,Ship Mode,Customer ID,Customer Name,Segment,Country,City,State,Postal Code,Region,Product ID,Category,Sub-Category,Product Name,Sales,Quantity,Discount,Profit
1,CA-2016-152156,2016-11-08,2016-11-11,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,42420,South,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,261.96,2,0.0,41.9136
2,CA-2016-152156,2016-11-08,2016-11-11,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,42420,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs, Rounded Back",731.94,3,0.0,219.582
3,CA-2016-138688,2016-06-12,2016-06-16,Second Class,DV-13045,Darrin Van Huff,Corporate,United States,Los Angeles,California,90036,West,OFF-LA-10000240,Office Supplies,Labels,Self-Adhesive Address Labels for Typewriters by Universal,14.62,2,0.0,6.8714
4,US-2015-108966,2015-10-11,2015-10-18,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,Florida,33311,South,FUR-TA-10000577,Furniture,Tables,Bretford CR4500 Series Slim Rectangular Table,957.5775,5,0.45,-383.031
5,US-2015-108966,2015-10-11,2015-10-18,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,Florida,33311,South,OFF-ST-10000760,Office Supplies,Storage,Eldon Fold 'N Roll Cart System,22.368,2,0.2,2.5164
