In [8]:
pip install pyodbc

Note: you may need to restart the kernel to use updated packages.


In [67]:
import pandas as pd
import pyodbc

In [68]:
# Load the cleaned dataset
sales = pd.read_csv('fashion&beauty_sales1.0.csv')
sales.head()


Unnamed: 0,invoice_no,customer_id,category,quantity,price,invoice_date,shopping_mall,total_sales
0,I138884,C241288,Clothing,5.0,1500.4,2022-08-05,Kanyon,7502.0
1,I317333,C111565,Shoes,3.0,1800.51,2021-12-12,Forum Istanbul,5401.53
2,I127801,C266599,Clothing,1.0,300.08,2021-11-09,Metrocity,300.08
3,I173702,C988172,Shoes,5.0,3000.85,2021-05-16,Metropol AVM,15004.25
4,I227836,C657758,Clothing,5.0,1500.4,2022-05-24,Forum Istanbul,7502.0


In [69]:
# Define the connection parameters for the SQL Server
server = 'DESKTOP-VK2UTCK\IC' 
database = 'Retail'  
# Establishing the connection to the database using Windows Authentication
try:
    conn = pyodbc.connect(
        f'DRIVER={{ODBC Driver 17 for SQL Server}};'
        f'SERVER={server};'
        f'DATABASE={database};'
        'Trusted_Connection=yes;'
    )
    cursor = conn.cursor()
    print("Connection established successfully using Windows Authentication.")

except pyodbc.Error as e:
    print(f"Error connecting to SQL Server: {e}")

Connection established successfully using Windows Authentication.


In [70]:
try:
    # Define the SQL query to create a new table if it does not exist
    create_table_query = ('''
    IF NOT EXISTS (SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 'Fashion')
    BEGIN
        CREATE TABLE Fashion (
            invoice_no VARCHAR(50)PRIMARY KEY,
            customer_id VARCHAR(50),
            category VARCHAR(100),
            quantity VARCHAR(10), 
            price FLOAT,
            invoice_date DATE,
            shopping_mall VARCHAR(100),
            total_sales FLOAT
        )
    END
    ''')

    # Execute the table creation query
    cursor.execute(create_table_query)

    # Commit the table creation to ensure it is completed
    conn.commit()
    print("Table created successfully.")

except Exception as e:
    conn.rollback()
    print(f"Error creating table: {e}")

Table created successfully.


In [71]:
# Inserting data into the table
try:
    for index, row in sales.iterrows():
        cursor.execute('''
        INSERT INTO Fashion (invoice_no, customer_id, category, quantity, price, invoice_date, shopping_mall, total_sales) 
        VALUES (?, ?, ?, ?, ?, ?, ?, ?)
        ''', 
        row['invoice_no'], row['customer_id'], row['category'], row['quantity'], row['price'], row['invoice_date'], row['shopping_mall'], row['total_sales'])

    # Commiting the transaction
    conn.commit()
    print("Data imported successfully.")

except Exception as e:
    conn.rollback()
    print(f"Error importing data: {e}")

Error importing data: ('23000', "[23000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Violation of PRIMARY KEY constraint 'PK__Fashion__F58CA1E3F6CB147F'. Cannot insert duplicate key in object 'dbo.Fashion'. The duplicate key value is (I138884). (2627) (SQLExecDirectW); [23000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]The statement has been terminated. (3621)")


In [72]:
 # Query to retrieve top 25 records
try:
    cursor.execute("SELECT TOP 25* FROM Fashion")
    rows = cursor.fetchall()

    # Display the records
    for row in rows:
        print(row)

except pyodbc.Error as e:
    print(f"Error retrieving records: {e}")

('I100008', 'C199951', 'Clothing', '5', 1500.4, datetime.date(2022, 7, 10), 'Emaar Square Mall', 7502.0)
('I100014', 'C138893', 'Cosmetics', '5', 203.3, datetime.date(2021, 6, 18), 'Viaport Outlet', 1016.5)
('I100028', 'C325553', 'Clothing', '1', 300.08, datetime.date(2021, 6, 19), 'Metrocity', 300.08)
('I100037', 'C122873', 'Clothing', '1', 300.08, datetime.date(2021, 6, 6), 'Kanyon', 300.08)
('I100041', 'C326923', 'Cosmetics', '3', 121.98, datetime.date(2022, 7, 24), 'Mall of Istanbul', 365.94)
('I100049', 'C297871', 'Shoes', '1', 600.17, datetime.date(2021, 12, 26), 'Mall of Istanbul', 600.17)
('I100053', 'C217294', 'Cosmetics', '3', 121.98, datetime.date(2023, 1, 2), 'Metrocity', 365.94)
('I100057', 'C225062', 'Clothing', '2', 600.16, datetime.date(2021, 10, 13), 'Forum Istanbul', 1200.32)
('I100071', 'C522050', 'Clothing', '3', 900.24, datetime.date(2021, 10, 16), 'Mall of Istanbul', 2700.7200000000003)
('I100084', 'C226369', 'Shoes', '2', 1200.34, datetime.date(2022, 10, 9), 'Kan

In [73]:
# sorting top 10 records by 'total_sales' in descending order
try:
    cursor.execute("SELECT TOP 10 * FROM Fashion ORDER BY total_sales DESC")
    sorted_rows = cursor.fetchall()

    for row in sorted_rows:
        print(row)

except pyodbc.Error as e:
    print(f"Error sorting records: {e}")


('I100137', 'C233463', 'Shoes', '5', 3000.85, datetime.date(2021, 5, 16), 'Mall of Istanbul', 15004.25)
('I100223', 'C212702', 'Shoes', '5', 3000.85, datetime.date(2022, 11, 21), 'Metrocity', 15004.25)
('I100439', 'C224698', 'Shoes', '5', 3000.85, datetime.date(2023, 3, 2), 'Metropol AVM', 15004.25)
('I101017', 'C124932', 'Shoes', '5', 3000.85, datetime.date(2022, 7, 24), 'Mall of Istanbul', 15004.25)
('I101360', 'C540352', 'Shoes', '5', 3000.85, datetime.date(2021, 7, 11), 'Mall of Istanbul', 15004.25)
('I101659', 'C197249', 'Shoes', '5', 3000.85, datetime.date(2022, 11, 29), 'Viaport Outlet', 15004.25)
('I101745', 'C105559', 'Shoes', '5', 3000.85, datetime.date(2021, 4, 5), 'Cevahir AVM', 15004.25)
('I101785', 'C279161', 'Shoes', '5', 3000.85, datetime.date(2022, 7, 13), 'Kanyon', 15004.25)
('I102061', 'C181574', 'Shoes', '5', 3000.85, datetime.date(2022, 3, 11), 'Kanyon', 15004.25)
('I102092', 'C172569', 'Shoes', '5', 3000.85, datetime.date(2022, 11, 16), 'Viaport Outlet', 15004.25)

In [74]:
# counting the number of records
try:
    cursor.execute("SELECT COUNT(*) FROM Fashion")
    Total_record_count = cursor.fetchone()[0]

    print(f"Total Number of records: {Total_record_count}")

except pyodbc.Error as e:
    print(f"Error counting records: {e}")


Total Number of records: 52944


In [75]:
# Grouping the records by 'category' and count them
try:
    cursor.execute("SELECT category, COUNT(*) FROM Fashion GROUP BY category")
    grouped_rows = cursor.fetchall()

    for row in grouped_rows:
        print(row)

except pyodbc.Error as e:
    print(f"Error grouping records: {e}")


('Cosmetics', 13463)
('Shoes', 8886)
('Clothing', 30595)


In [76]:
#updating the product quantity of invoice number I227836
try:
    # Defining the invoice number and new quantity to be set
    updated_quantity = 3
    invoice_no = 'I227836'

    # Execute the update query
    cursor.execute('''
    UPDATE Fashion
    SET quantity = ?, 
        total_sales = ? * price
    WHERE invoice_no = ?
    ''', (new_quantity, new_quantity, invoice_no))

    # Committing the changes
    conn.commit()
    print(f"Record with invoice number {invoice_no} updated successfully.")

except pyodbc.Error as e:
    conn.rollback()
    print(f"Error updating record: {e}")

Record with invoice number I227836 updated successfully.


In [None]:
# Define the table name and the output file path
table = 'Fashion'
output_file = 'fashion_sales.json'  

try:
    # Query to select all data from the specified table
    query = f"SELECT * FROM {table_name}"

    # Read the data into a pandas DataFrame
    df = pd.read_sql(query, conn)

    # Export the data to a CSV file
    df.to_csv(output_file, index=False)  

    print(f"Data exported successfully to {output_file}")

except Exception as e:
    print(f"Error exporting data: {e}")

In [66]:
try:
    conn.close()
    print("Connection closed successfully.")

except pyodbc.Error as e:
    print(f"Error closing the connection: {e}")


Connection closed successfully.
