# Fashion Transactions 2025 - Data Structure & Algorithm (DSA) Operations
This notebook demonstrates DSA techniques (searching, sorting, grouping, filtering, etc.) on the fashion dataset.

In [4]:
# Import necessary libraries
import pandas as pd
import numpy as np

In [6]:
# Load one part of the dataset for demonstration (e.g., part 1)
df = pd.read_csv('fashion_transactions_2025.csv')
df.head()

Unnamed: 0,order_id,order_date,customer_id,age,gender,city,category,subcategory,brand,price,quantity,discount,final_price,channel,return_flag,rating
0,ORD00001,20-04-2025,CUST2824,19,Other,Markland,Footwear,Sneakers,Adidas,1643,1,25,1232.25,Online,0,1
1,ORD00002,27-05-2025,CUST1488,23,Male,Port Erinmouth,Clothing,T-Shirts,Adidas,4964,4,5,18863.2,Retail,0,1
2,ORD00003,11-01-2025,CUST3615,45,Female,West Angelicaland,Footwear,Sneakers,Adidas,3257,1,0,3257.0,Retail,1,3
3,ORD00004,06-04-2025,CUST5333,20,Other,Lawrenceside,Footwear,Boots,H&M,3600,1,20,2880.0,Retail,0,5
4,ORD00005,20-04-2025,CUST6925,54,Male,Emilymouth,Accessories,Bags,Zara,2366,3,0,7098.0,Online,0,4


In [8]:
df.shape

(55000, 16)

## Linear Search: Find all transactions with discount > 50%

In [10]:
high_discount = df[df['discount'] > 50]
high_discount.head()

Unnamed: 0,order_id,order_date,customer_id,age,gender,city,category,subcategory,brand,price,quantity,discount,final_price,channel,return_flag,rating


## Binary Search: On sorted price list (search for closest to ₹500)

In [12]:
prices = sorted(df['final_price'].dropna())

# Binary search for closest value to ₹500
def binary_search_closest(arr, target):
    low, high = 0, len(arr) - 1
    while low <= high:
        mid = (low + high) // 2
        if arr[mid] == target:
            return arr[mid]
        elif arr[mid] < target:
            low = mid + 1
        else:
            high = mid - 1
    # Return closest
    return arr[low] if low < len(arr) else arr[high]

closest_price = binary_search_closest(prices, 500)
print(f'Closest price to ₹500 is: ₹{closest_price}')

Closest price to ₹500 is: ₹501.3


## Sorting: Top 10 highest final priced orders

In [17]:
top_prices = df.sort_values(by='final_price', ascending=False).head(10)
top_prices[['order_id', 'final_price']]

Unnamed: 0,order_id,final_price
36387,ORD36388,24820.0
19481,ORD19482,24820.0
7606,ORD07607,24820.0
52150,ORD52151,24820.0
39066,ORD39067,24820.0
38957,ORD38958,24820.0
31583,ORD31584,24820.0
23638,ORD23639,24820.0
12001,ORD12002,24820.0
5349,ORD05350,24820.0


## Hashing: Count number of purchases per user

In [24]:
user_counts = df['customer_id'].value_counts()
user_counts.head()

customer_id
CUST5328    75
CUST3940    58
CUST1439    55
CUST7227    55
CUST1543    53
Name: count, dtype: int64

## Grouping: Total revenue per brand

In [22]:
revenue_per_brand = df.groupby('brand')['final_price'].sum().sort_values(ascending=False)
revenue_per_brand.head()

brand
Prada     51168575.15
Zara      50154732.70
Adidas    49728724.85
Gucci     49152679.05
Uniqlo    48848001.15
Name: final_price, dtype: float64