In [1]:
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)

In [2]:
df = pd.read_csv('after_merge.csv')

In [3]:
df

Unnamed: 0.1,Unnamed: 0,SKU,STORE,COST,RETAIL,REGISTER,TRANNUM,SEQ,SALEDATE,STYPE,QUANTITY,ORGPRICE,SPRICE,AMT,INTERID,MIC,CITY,STATE,ZIP
0,0,3,4603,123.36,440.0,100,1200,0,2005-04-12,R,1,440.00,30.00,30.00,3400000,333,CORALVILLE,IA,52241
1,1,6819,4603,4.00,2.5,470,1600,0,2005-07-23,P,1,9.99,2.00,2.00,139000038,87,CORALVILLE,IA,52241
2,2,9633,4603,13.50,22.5,280,1200,0,2004-11-03,P,1,21.00,21.00,21.00,512500049,281,CORALVILLE,IA,52241
3,3,10896,4603,11.10,18.5,390,1900,0,2004-09-01,P,1,18.50,18.50,18.50,521400008,281,CORALVILLE,IA,52241
4,4,17379,4603,24.03,16.0,500,1800,0,2005-07-07,P,1,64.00,16.00,16.00,526900029,689,CORALVILLE,IA,52241
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1134482,1134482,9888223,7604,9.32,16.0,340,1000,670805443,2005-08-20,P,1,32.00,16.00,16.00,509400085,697,RICHMOND,VA,23233
1134483,1134483,9928678,7604,7.25,4.5,70,5900,0,2005-08-20,P,1,18.00,4.50,4.50,189400107,810,RICHMOND,VA,23233
1134484,1134484,9957390,7604,17.10,28.5,770,200,0,2005-08-06,P,1,28.50,28.50,28.50,581500061,643,RICHMOND,VA,23233
1134485,1134485,9979437,7604,148.50,134.3,350,600,941208922,2005-08-23,P,1,395.00,94.01,94.01,77600063,983,RICHMOND,VA,23233


In [4]:
# Assuming your DataFrame is named df
# Convert SALEDATE to datetime
df['SALEDATE'] = pd.to_datetime(df['SALEDATE'])

# Inventory Turnover
df['InventoryTurnover'] = df['COST'] / df['QUANTITY']

# Gross Profit
df['GrossProfit'] = df['RETAIL'] - (df['COST'] * df['QUANTITY'])

# Year-over-Year (YoY) Growth
df['YoYGrowth'] = df.groupby('SKU')['AMT'].pct_change(12) * 100

# Return Percentage by SKU
df['ReturnPercentage'] = (df['ORGPRICE'] - df['SPRICE']) / df['ORGPRICE'] * 100

# Average Spend by Store
avg_spend_by_store = df.groupby('STORE')['AMT'].mean()

# Display results
print("Inventory Turnover:")
print(df['InventoryTurnover'].head())

print("\nGross Profit:")
print(df['GrossProfit'].head())

print("\nYoY Growth:")
print(df['YoYGrowth'].head())

print("\nReturn Percentage by SKU:")
print(df['ReturnPercentage'].head())

print("\nAverage Spend by Store:")
print(avg_spend_by_store.head())


Inventory Turnover:
0    123.36
1      4.00
2     13.50
3     11.10
4     24.03
Name: InventoryTurnover, dtype: float64

Gross Profit:
0    316.64
1     -1.50
2      9.00
3      7.40
4     -8.03
Name: GrossProfit, dtype: float64

YoY Growth:
0   NaN
1   NaN
2   NaN
3   NaN
4   NaN
Name: YoYGrowth, dtype: float64

Return Percentage by SKU:
0    93.181818
1    79.979980
2     0.000000
3     0.000000
4    75.000000
Name: ReturnPercentage, dtype: float64

Average Spend by Store:
STORE
102    29.375504
103    26.651302
107    27.926380
202    23.023719
203    25.629025
Name: AMT, dtype: float64


In [5]:
df['SALEDATE'] = pd.to_datetime(df['SALEDATE'])

# Create a new column for the year
df['Year'] = df['SALEDATE'].dt.year

# Calculate YoY Total Sales by Store
df['YoYTotalSales'] = df.groupby(['Year', 'STORE'])['RETAIL'].transform('sum')

# Calculate YoY Percentage Increase
df['YoYPercentageIncrease'] = df.groupby('STORE')['YoYTotalSales'].pct_change() * 100

total_sales_returns = df.groupby(['STORE', 'STYPE'])['AMT'].sum().unstack(fill_value=0)

# Calculate the percentage of returns for each store
total_sales_returns['ReturnPercentage'] = (total_sales_returns['R'] / (total_sales_returns['P'] + total_sales_returns['R'])) * 100

# Merge the results back to the original DataFrame
df = df.merge(total_sales_returns['ReturnPercentage'].reset_index(), on='STORE', how='left')


df

Unnamed: 0.1,Unnamed: 0,SKU,STORE,COST,RETAIL,REGISTER,TRANNUM,SEQ,SALEDATE,STYPE,QUANTITY,ORGPRICE,SPRICE,AMT,INTERID,MIC,CITY,STATE,ZIP,InventoryTurnover,GrossProfit,YoYGrowth,ReturnPercentage_x,Year,YoYTotalSales,YoYPercentageIncrease,ReturnPercentage_y
0,0,3,4603,123.36,440.0,100,1200,0,2005-04-12,R,1,440.00,30.00,30.00,3400000,333,CORALVILLE,IA,52241,123.36,316.64,,93.181818,2005,34497.61,,11.655572
1,1,6819,4603,4.00,2.5,470,1600,0,2005-07-23,P,1,9.99,2.00,2.00,139000038,87,CORALVILLE,IA,52241,4.00,-1.50,,79.979980,2005,34497.61,0.000000,11.655572
2,2,9633,4603,13.50,22.5,280,1200,0,2004-11-03,P,1,21.00,21.00,21.00,512500049,281,CORALVILLE,IA,52241,13.50,9.00,,0.000000,2004,9425.85,-72.676803,11.655572
3,3,10896,4603,11.10,18.5,390,1900,0,2004-09-01,P,1,18.50,18.50,18.50,521400008,281,CORALVILLE,IA,52241,11.10,7.40,,0.000000,2004,9425.85,0.000000,11.655572
4,4,17379,4603,24.03,16.0,500,1800,0,2005-07-07,P,1,64.00,16.00,16.00,526900029,689,CORALVILLE,IA,52241,24.03,-8.03,,75.000000,2005,34497.61,265.989380,11.655572
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1134482,1134482,9888223,7604,9.32,16.0,340,1000,670805443,2005-08-20,P,1,32.00,16.00,16.00,509400085,697,RICHMOND,VA,23233,9.32,6.68,,50.000000,2005,9000.48,0.000000,11.894831
1134483,1134483,9928678,7604,7.25,4.5,70,5900,0,2005-08-20,P,1,18.00,4.50,4.50,189400107,810,RICHMOND,VA,23233,7.25,-2.75,,75.000000,2005,9000.48,0.000000,11.894831
1134484,1134484,9957390,7604,17.10,28.5,770,200,0,2005-08-06,P,1,28.50,28.50,28.50,581500061,643,RICHMOND,VA,23233,17.10,11.40,0.0,0.000000,2005,9000.48,0.000000,11.894831
1134485,1134485,9979437,7604,148.50,134.3,350,600,941208922,2005-08-23,P,1,395.00,94.01,94.01,77600063,983,RICHMOND,VA,23233,148.50,-14.20,,76.200000,2005,9000.48,0.000000,11.894831


In [6]:
df2 = df.groupby('STORE')[['InventoryTurnover', 'GrossProfit', 'ReturnPercentage_y']].mean()

In [7]:
df2['success'] = ((df2['InventoryTurnover'] > df2['InventoryTurnover'].quantile(0.50)) &
                          (df2['GrossProfit'] > df2['GrossProfit'].quantile(0.50)) &
                          (df2['ReturnPercentage_y'] > df2['ReturnPercentage_y'].quantile(0.05))).astype(int)


In [8]:
df2['success'].value_counts()

0    239
1     86
Name: success, dtype: int64

In [9]:
df2

Unnamed: 0_level_0,InventoryTurnover,GrossProfit,ReturnPercentage_y,success
STORE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
102,17.663513,6.073868,9.726143,1
103,16.089679,6.849022,10.602569,1
107,16.287056,7.251869,10.331423,1
202,14.711616,4.641666,10.407317,0
203,15.545438,5.673788,12.336764,1
...,...,...,...,...
9704,12.332212,6.332404,7.494701,0
9709,12.938681,4.194834,9.260292,0
9804,14.676519,7.659122,10.521946,0
9806,17.165040,13.592576,4.027118,0
