In [None]:
from google.cloud import bigquery

# Initialize the BigQuery client
client = bigquery.Client.from_service_account_json('/Users/servandodavidtorresgarcia/servando/relu/gcp/local-catalyst-398309-52645c242657.json')


# Define your table and destination URI
dataset_name = 'thelook_ecommerce'
table_name = 'distribution_centers'
bucket_name = 'ecommerce_looker_download'
destination_uri = f"gs://{bucket_name}/{table_name}.csv"

# Create an extract job
dataset_ref = client.dataset(dataset_name, project='bigquery-public-data')
table_ref = dataset_ref.table(table_name)

extract_job = client.extract_table(
    table_ref,
    destination_uri,
    location='US',  # Location must match that of the source table.
)
extract_job.result()  # Wait for job to complete

print(f"Exported {dataset_name}.{table_name} to {destination_uri}")


In [2]:

from src.data.dataset_reader import DatasetReader
from pathlib import Path
import pandas as pd
pd.set_option('display.max_columns', None)
%load_ext autoreload
%autoreload 2
# ignore warnings
import warnings
warnings.filterwarnings('ignore')
data_folder = Path("../data/raw").resolve()

products = DatasetReader(data_folder).get_data_csv('products.csv')
orders = DatasetReader(data_folder).get_data_csv('orders.csv')
order_items = DatasetReader(data_folder).get_data_csv('order_items.csv')
inventory_items = DatasetReader(data_folder).get_data_csv('inventory_items.csv')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
order_items.query('id == 27569')

In [None]:
orders.query("order_id == 18975")

In [None]:
products.query("id == 19220")

In [None]:
inventory_items.query("id == 74342")

In [3]:
# Select specific columns from each DataFrame
order_items_selected = order_items[['product_id','order_id','status', 'created_at', 'shipped_at', 'delivered_at', 'returned_at', 'sale_price']]
orders_selected = orders[['order_id', 'gender', 'num_of_item']]  # Including 'order_id' for joining
products_selected = products[['id', 'cost', 'category', 'name', 'brand', 'retail_price', 'department', 'sku']]  # Including 'id' for joining

# Perform the join operations based on the relationships identified in the queries
# Assuming 'order_id' is common between 'order_items' and 'orders'
# Assuming 'product_id' is common between 'order_items' and 'products'

merged_df = order_items_selected \
    .merge(orders_selected, on='order_id', how='inner') \
    .merge(products_selected, left_on='product_id', right_on='id', how='inner')



In [4]:
merged_df.drop(columns=['product_id', 'order_id', 'id'], inplace=True)

In [5]:
# place sku to the beginning of the dataframe
cols = list(merged_df.columns)
cols.insert(0, cols.pop(cols.index('sku')))
merged_df = merged_df.loc[:, cols]


In [6]:
merged_df.head()
# checl if sale_price is equal to retail_price
merged_df.query('sale_price != retail_price')
merged_df.drop(columns=['retail_price'], inplace=True)

In [None]:
print(merged_df.head().to_string(index=False))

In [None]:
merged_df.query('sku == "4ACBEDBE977480D19B7B682D4878CAE2"')

In [None]:
merged_df.status.unique()

In [7]:
#trainsform dates to datetime
merged_df['created_at'] = pd.to_datetime(merged_df['created_at'])
merged_df['shipped_at'] = pd.to_datetime(merged_df['shipped_at'])
merged_df['delivered_at'] = pd.to_datetime(merged_df['delivered_at'])
merged_df['returned_at'] = pd.to_datetime(merged_df['returned_at'])

In [8]:
merged_df.head()
merged_df.groupby('sku').size().reset_index(name='sales')
# create column sales that is the number sales per sku
# Correct way to calculate sales per SKU
merged_df['sales'] = merged_df.groupby('sku')['num_of_item'].transform('sum')


In [None]:
merged_df.head(3
               )

In [None]:
merged_df.query('sku == "4ACBEDBE977480D19B7B682D4878CAE2"')
# TODO see duplicated skus with their corresponding sale and decide what to do with them

In [None]:
from src.data.eda import Eda
eda = Eda()
eda.missing_values_table(merged_df)

In [None]:
# inspecting for seasonality
from src.data.seasonality import SeasonalityInspector, QuickSeasonalityInspector


# Initialize
inspector = SeasonalityInspector()
quick_inspector = QuickSeasonalityInspector()

# Load your data into a DataFrame called 'data'
# data = pd.read_csv('your_data.csv')

# Plot time series
#inspector.plot_time_series(merged_df, 'created_at', 'sales')



In [None]:
import numpy as np
# Find dominant period through FFT
dominant_period = quick_inspector.fast_fourier_transform(merged_df, 'sales')
print(f"Dominant period: {dominant_period}")


# Initialize default N
N = 7

# Check if dominant_period is finite
if np.isfinite(dominant_period):
    # Run rolling stats only if dominant_period is finite
    is_mean_stable, is_var_stable = quick_inspector.rolling_stats(merged_df, 'sales', window=int(dominant_period))
    
    # Update N based on rolling stats
    if not (is_mean_stable and is_var_stable):
        N = int(dominant_period)

print(f"Optimal N: {N}")


In [38]:
from src.data.feature_engineering import FeatureEngineeringProcess
fep = FeatureEngineeringProcess()


In [39]:
merged_df_fe1 = fep.price_sales_correlation_features_updated(merged_df, 7, [(-5.0, 1.0), (-3.0, 1.0), (-2.0, 1.0), (1.0, 1.0), (-1.0, 0.5), (-1.0, 0.33)],
                                                             'sku', 'created_at', 'sale_price', 'sales')

In [None]:
from src.data.eda import Eda
eda = Eda()

In [None]:
eda.missing_values_table(merged_df_fe1)

In [40]:
merged_df_fe2 =  fep.normalize_features(merged_df_fe1, [5, 10, 15], 28, 'sku', 'created_at', 'sale_price', 'sales')

In [41]:
merged_df_fe3, insuficient_data = fep.filter_stability_periods(merged_df_fe2, 7, 0.04, sku_column='sku', date_column='created_at', price_column='sale_price')

In [None]:
print(merged_df_fe3[['category', 'name', 'brand', 'department']].head())

In [None]:
merged_df_fe3.head()

In [32]:
## embeddings
from src.data.embeddings import EmbeddingModel
import torch
from sklearn.preprocessing import LabelEncoder
merged_df_embeddings = merged_df.copy()


label_encoders = {}
for col in ['category', 'name', 'brand', 'department']:
    le = LabelEncoder()
    merged_df_embeddings[col] = le.fit_transform(merged_df_embeddings[col])
    label_encoders[col] = le

embedding_dims = [(merged_df_embeddings[col].max() + 1, int((merged_df_embeddings[col].max() + 1) ** 0.5)) for col in ['category', 'name', 'brand', 'department']]
model = EmbeddingModel(embedding_dims)




#preapre data for embeddings
cat_data = merged_df_embeddings[['category', 'name', 'brand', 'department']].values
cat_data_tensor = torch.tensor(cat_data, dtype=torch.long)


In [33]:
import torch.nn as nn
sales_data = merged_df_embeddings['sales'].values
sales_data_tensor = torch.tensor(sales_data, dtype=torch.float32).view(-1, 1)

import torch.optim as optim

# Loss and optimizer
criterion = nn.MSELoss()  # Mean Squared Error for a regression problem
optimizer = optim.Adam(model.parameters(), lr=0.01)
epochs = 3  # Number of training epochs

for epoch in range(epochs):
    # Forward pass
    outputs = model(cat_data_tensor)
    
    # Compute loss
    loss = criterion(outputs, sales_data_tensor)

    # Zero gradients, backward pass, optimizer step
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')


Inside Forward
Epoch [1/3], Loss: 220.6691
Inside Forward
Epoch [2/3], Loss: 220.3953
Inside Forward
Epoch [3/3], Loss: 220.1217


In [35]:

with torch.no_grad():
    embeddings = model.forward(cat_data_tensor).numpy()


Inside Forward


In [36]:
merged_df = pd.concat([merged_df, pd.DataFrame(embeddings)], axis=1)
merged_df.drop(['category', 'name', 'brand', 'department'], axis=1, inplace=True)


In [42]:
## quality assurance 

merged_df_fe3.head()

Unnamed: 0,sku,status,created_at,shipped_at,delivered_at,returned_at,sale_price,gender,num_of_item,cost,sales,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,f_corr_-5.0_1.0,f_corr_-3.0_1.0,f_corr_-2.0_1.0,f_corr_1.0_1.0,f_corr_-1.0_0.5,f_corr_-1.0_0.33,normalized_log_avg_price_5_days,normalized_log_avg_sales_5_days,normalized_std_price_5_days,normalized_std_sales_5_days,normalized_log_avg_price_10_days,normalized_log_avg_sales_10_days,normalized_std_price_10_days,normalized_std_sales_10_days,normalized_log_avg_price_15_days,normalized_log_avg_sales_15_days,normalized_std_price_15_days,normalized_std_sales_15_days
169130,00003E3B9E5336685200AE85D21B4F5E,Shipped,2022-05-14 21:24:59+00:00,2022-05-15 20:22:00+00:00,NaT,NaT,99.0,F,2,56.232,4,0.631757,-0.76016,-0.116283,-1.268659,0.418157,0.322902,-0.846711,-1.052283,-0.252091,-0.930282,-0.363347,-0.159582,-0.458957,0.075026,0.164128,-0.295608,-0.207092,-1.177966,0.594594,2.166248,0.623266,0.915005,0.035908,1.059629,0.117021,1.069502,-0.436264,0.061746,0.657119,-0.219644,-0.801903,-0.113448,2.324102,-2.1716,0.705701,0.236924,-1.100304,2.462871,0.63848,-0.941088,-0.309316,0.417955,0.413417,0.759903,-1.695728,-0.49745,0.217141,0.964331,-0.584676,-1.624787,-0.024414,-0.258358,0.329589,0.196012,0.840671,-0.596519,1.140039,2.725322,-1.225726,0.167673,-1.164739,0.683688,0.24503,-0.582383,0.614965,1.033348,1.121344,-0.042182,0.978658,-1.195998,-1.181231,-0.36908,-0.40417,0.638707,0.715115,0.220856,-0.690681,-0.018661,0.183344,-0.154376,0.683673,-1.180188,0.861162,-1.590414,0.367333,-0.78952,-0.083583,0.685781,-0.061901,-1.866557,0.447379,-0.595483,0.17334,-1.61321,-0.894321,0.953592,-0.867383,-0.391639,-1.804825,-0.394888,-1.048432,0.604943,0.332982,-0.070469,0.203051,-0.350172,0.722368,0.876336,1.131705,0.290695,0.377771,1.122863,-1.58562,0.726153,0.483862,0.006609,-0.419614,-0.23862,1.064543,0.509797,0.76957,-0.675322,1.351774,1.578106,-0.540527,0.360223,-2.718648,-0.209178,0.363977,2.345551,-0.775054,-0.410015,0.323086,0.986062,0.077158,-1.265769,-1.408652,1.955522,2.221167,0.165256,-0.148993,0.91111,-0.983305,1.584102,1.488645,-1.123214,2.192798,0.075591,0.937907,-0.077175,0.299736,-0.851331,-0.61888,0.52499,-1.120899,-1.318662,-0.626563,-1.225812,-0.4322,0.344142,-0.508691,0.790273,-1.946357,1.475456,-0.363047,0.6245,0.639949,-0.580233,0.154645,-1.838059,-0.710956,-0.255829,0.846079,0.202946,0.805712,1.415565,0.995126,-0.108036,0.094724,1.238217,-0.396282,0.354132,-0.630043,0.853257,0.484022,-0.769045,1.472674,-0.06599,0.008685,-0.344417,1.105909,-2.119451,-1.295407,0.295699,-0.451892,0.999469,-0.682052,0.953146,0.66875,-1.556859,-0.403738,0.962232,-0.715897,1.053807,-0.665399,-0.07881,-0.151233,-0.050524,0.740427,-1.339603,-0.532408,-1.412651,1.310249,0.406071,0.581328,-1.692472,-1.68848,2.358043,-1.119078,-1.522166,0.868449,0.070707,0.551867,,,,,,,,,,,,,,,,,,
169131,00003E3B9E5336685200AE85D21B4F5E,Complete,2022-12-09 03:58:35+00:00,2022-12-10 03:49:00+00:00,2022-12-10 13:54:00+00:00,NaT,99.0,F,1,56.232,4,0.631757,-0.76016,-0.116283,-1.268659,0.418157,0.322902,-0.846711,-1.052283,-0.252091,-0.930282,-0.363347,-0.159582,-0.458957,0.075026,0.164128,-0.295608,-0.207092,-1.177966,0.594594,2.166248,0.623266,0.915005,0.035908,1.059629,0.117021,1.069502,-0.436264,0.061746,0.657119,-0.219644,-0.801903,-0.113448,2.324102,-2.1716,0.705701,0.236924,-1.100304,2.462871,0.63848,-0.941088,-0.309316,0.417955,0.413417,0.759903,-1.695728,-0.49745,0.217141,0.964331,-0.584676,-1.624787,-0.024414,-0.258358,0.329589,0.196012,0.840671,-0.596519,1.140039,2.725322,-1.225726,0.167673,-1.164739,0.683688,0.24503,-0.582383,0.614965,1.033348,1.121344,-0.042182,0.978658,-1.195998,-1.181231,-0.36908,-0.40417,0.638707,0.715115,0.220856,-0.690681,-0.018661,0.183344,-0.154376,0.683673,-1.180188,0.861162,-1.590414,0.367333,-0.78952,-0.083583,0.685781,-0.061901,-1.866557,0.447379,-0.595483,0.17334,-1.61321,-0.894321,0.953592,-0.867383,-0.391639,-1.804825,-0.394888,-1.048432,0.604943,0.332982,-0.070469,0.203051,-0.350172,0.722368,0.876336,1.131705,0.290695,0.377771,1.122863,-1.58562,0.726153,0.483862,0.006609,-0.419614,-0.23862,1.064543,0.509797,0.76957,-0.675322,1.351774,1.578106,-0.540527,0.360223,-2.718648,-0.209178,0.363977,2.345551,-0.775054,-0.410015,0.323086,0.986062,0.077158,-1.265769,-1.408652,1.955522,2.221167,0.165256,-0.148993,0.91111,-0.983305,1.584102,1.488645,-1.123214,2.192798,0.075591,0.937907,-0.077175,0.299736,-0.851331,-0.61888,0.52499,-1.120899,-1.318662,-0.626563,-1.225812,-0.4322,0.344142,-0.508691,0.790273,-1.946357,1.475456,-0.363047,0.6245,0.639949,-0.580233,0.154645,-1.838059,-0.710956,-0.255829,0.846079,0.202946,0.805712,1.415565,0.995126,-0.108036,0.094724,1.238217,-0.396282,0.354132,-0.630043,0.853257,0.484022,-0.769045,1.472674,-0.06599,0.008685,-0.344417,1.105909,-2.119451,-1.295407,0.295699,-0.451892,0.999469,-0.682052,0.953146,0.66875,-1.556859,-0.403738,0.962232,-0.715897,1.053807,-0.665399,-0.07881,-0.151233,-0.050524,0.740427,-1.339603,-0.532408,-1.412651,1.310249,0.406071,0.581328,-1.692472,-1.68848,2.358043,-1.119078,-1.522166,0.868449,0.070707,0.551867,,,,,,,,,,,,,,,,,,
169132,00003E3B9E5336685200AE85D21B4F5E,Complete,2023-05-12 23:08:53+00:00,2023-05-15 07:59:00+00:00,2023-05-18 01:27:00+00:00,NaT,99.0,F,1,56.232,4,0.631757,-0.76016,-0.116283,-1.268659,0.418157,0.322902,-0.846711,-1.052283,-0.252091,-0.930282,-0.363347,-0.159582,-0.458957,0.075026,0.164128,-0.295608,-0.207092,-1.177966,0.594594,2.166248,0.623266,0.915005,0.035908,1.059629,0.117021,1.069502,-0.436264,0.061746,0.657119,-0.219644,-0.801903,-0.113448,2.324102,-2.1716,0.705701,0.236924,-1.100304,2.462871,0.63848,-0.941088,-0.309316,0.417955,0.413417,0.759903,-1.695728,-0.49745,0.217141,0.964331,-0.584676,-1.624787,-0.024414,-0.258358,0.329589,0.196012,0.840671,-0.596519,1.140039,2.725322,-1.225726,0.167673,-1.164739,0.683688,0.24503,-0.582383,0.614965,1.033348,1.121344,-0.042182,0.978658,-1.195998,-1.181231,-0.36908,-0.40417,0.638707,0.715115,0.220856,-0.690681,-0.018661,0.183344,-0.154376,0.683673,-1.180188,0.861162,-1.590414,0.367333,-0.78952,-0.083583,0.685781,-0.061901,-1.866557,0.447379,-0.595483,0.17334,-1.61321,-0.894321,0.953592,-0.867383,-0.391639,-1.804825,-0.394888,-1.048432,0.604943,0.332982,-0.070469,0.203051,-0.350172,0.722368,0.876336,1.131705,0.290695,0.377771,1.122863,-1.58562,0.726153,0.483862,0.006609,-0.419614,-0.23862,1.064543,0.509797,0.76957,-0.675322,1.351774,1.578106,-0.540527,0.360223,-2.718648,-0.209178,0.363977,2.345551,-0.775054,-0.410015,0.323086,0.986062,0.077158,-1.265769,-1.408652,1.955522,2.221167,0.165256,-0.148993,0.91111,-0.983305,1.584102,1.488645,-1.123214,2.192798,0.075591,0.937907,-0.077175,0.299736,-0.851331,-0.61888,0.52499,-1.120899,-1.318662,-0.626563,-1.225812,-0.4322,0.344142,-0.508691,0.790273,-1.946357,1.475456,-0.363047,0.6245,0.639949,-0.580233,0.154645,-1.838059,-0.710956,-0.255829,0.846079,0.202946,0.805712,1.415565,0.995126,-0.108036,0.094724,1.238217,-0.396282,0.354132,-0.630043,0.853257,0.484022,-0.769045,1.472674,-0.06599,0.008685,-0.344417,1.105909,-2.119451,-1.295407,0.295699,-0.451892,0.999469,-0.682052,0.953146,0.66875,-1.556859,-0.403738,0.962232,-0.715897,1.053807,-0.665399,-0.07881,-0.151233,-0.050524,0.740427,-1.339603,-0.532408,-1.412651,1.310249,0.406071,0.581328,-1.692472,-1.68848,2.358043,-1.119078,-1.522166,0.868449,0.070707,0.551867,,,,,,,,,,,,,,,,,,
171824,0004D0B59E19461FF126E3A08A814C33,Processing,2020-08-31 08:42:06+00:00,NaT,NaT,NaT,79.949997,F,1,37.656449,6,-0.649556,-1.756272,0.127767,-0.231711,-0.220959,-0.725222,-0.673059,-0.688489,-0.187438,-0.212767,-1.385889,0.951566,-0.697964,-1.21284,-0.729996,1.363052,1.041494,-0.919832,-0.316469,-1.01635,1.409205,0.784698,1.138312,1.572813,-0.68499,0.256066,0.445566,1.976982,-0.30042,-0.962215,-0.248413,0.270588,-1.11644,0.234201,0.397404,0.873276,0.421292,1.301951,-0.075809,0.654098,-1.100907,1.072841,-0.040191,-0.081623,0.641266,-1.241351,-2.569828,-0.954098,1.186735,0.345113,-0.215209,0.448969,1.072929,-0.213813,-1.82404,-0.786574,-0.973607,0.428525,0.228124,0.717831,-0.90476,-0.113489,-0.708962,-0.908692,0.857195,-1.184934,-0.989108,1.770424,2.368068,0.437148,1.848252,-0.525505,0.330536,-1.086485,1.872045,-0.81708,0.600823,1.007275,-1.211317,1.695981,-0.427835,-0.707461,-1.189546,0.487956,-0.352018,0.488004,0.955322,-0.335419,1.961599,1.623708,1.169015,-1.979976,-0.501332,-1.396503,1.623595,-2.04533,-0.30282,0.665475,1.585567,-2.321029,2.047873,-0.765952,0.045853,0.866394,1.392756,-0.613565,-0.108412,-0.976837,-0.007095,-0.210165,0.357478,0.508281,-0.333039,-0.714883,-1.044091,1.016965,-0.921858,0.901273,0.694268,-1.569583,1.632817,0.02054,0.588844,-0.755267,-1.347123,0.879929,1.918427,2.382419,1.602691,0.162943,-1.253249,1.386585,-0.36441,-0.946463,0.403243,1.484389,-1.071736,-1.623584,-0.174453,0.253763,1.708293,-0.0565,1.039119,-0.246598,-1.270756,-0.055138,-1.851316,-1.287644,-0.573839,0.353305,1.16871,0.179001,-0.689324,0.321405,-1.226778,1.033153,0.019817,1.558884,0.362043,-0.55952,0.466311,-1.733447,-0.7168,1.000805,0.879636,0.225116,-0.327619,-0.089703,-0.060398,-0.95269,-1.464153,0.0486,-1.162097,-0.684066,0.148324,-0.90694,-0.292813,0.647805,-1.142286,1.093052,1.228165,-1.091782,-0.224519,-0.171219,1.332349,-0.416869,-0.519977,-2.516383,-2.088141,-1.06446,-0.210851,-0.725414,0.926368,1.070919,-1.074553,-1.489991,0.424806,-0.826769,0.097031,-0.088284,0.194873,0.738627,1.037482,-0.924275,-0.551034,-0.51577,0.595025,-1.351485,-1.483648,-2.485498,0.034812,-1.035437,0.600973,-0.626463,0.950461,-1.309381,-0.140108,1.100806,0.639662,-0.979765,1.505407,-0.521984,0.551867,,,,,,,,,,,,,,,,,,
171823,0004D0B59E19461FF126E3A08A814C33,Shipped,2022-10-19 10:38:15+00:00,2022-10-21 14:40:00+00:00,NaT,NaT,79.949997,F,1,37.656449,6,-0.649556,-1.756272,0.127767,-0.231711,-0.220959,-0.725222,-0.673059,-0.688489,-0.187438,-0.212767,-1.385889,0.951566,-0.697964,-1.21284,-0.729996,1.363052,1.041494,-0.919832,-0.316469,-1.01635,1.409205,0.784698,1.138312,1.572813,-0.68499,0.256066,0.445566,1.976982,-0.30042,-0.962215,-0.248413,0.270588,-1.11644,0.234201,0.397404,0.873276,0.421292,1.301951,-0.075809,0.654098,-1.100907,1.072841,-0.040191,-0.081623,0.641266,-1.241351,-2.569828,-0.954098,1.186735,0.345113,-0.215209,0.448969,1.072929,-0.213813,-1.82404,-0.786574,-0.973607,0.428525,0.228124,0.717831,-0.90476,-0.113489,-0.708962,-0.908692,0.857195,-1.184934,-0.989108,1.770424,2.368068,0.437148,1.848252,-0.525505,0.330536,-1.086485,1.872045,-0.81708,0.600823,1.007275,-1.211317,1.695981,-0.427835,-0.707461,-1.189546,0.487956,-0.352018,0.488004,0.955322,-0.335419,1.961599,1.623708,1.169015,-1.979976,-0.501332,-1.396503,1.623595,-2.04533,-0.30282,0.665475,1.585567,-2.321029,2.047873,-0.765952,0.045853,0.866394,1.392756,-0.613565,-0.108412,-0.976837,-0.007095,-0.210165,0.357478,0.508281,-0.333039,-0.714883,-1.044091,1.016965,-0.921858,0.901273,0.694268,-1.569583,1.632817,0.02054,0.588844,-0.755267,-1.347123,0.879929,1.918427,2.382419,1.602691,0.162943,-1.253249,1.386585,-0.36441,-0.946463,0.403243,1.484389,-1.071736,-1.623584,-0.174453,0.253763,1.708293,-0.0565,1.039119,-0.246598,-1.270756,-0.055138,-1.851316,-1.287644,-0.573839,0.353305,1.16871,0.179001,-0.689324,0.321405,-1.226778,1.033153,0.019817,1.558884,0.362043,-0.55952,0.466311,-1.733447,-0.7168,1.000805,0.879636,0.225116,-0.327619,-0.089703,-0.060398,-0.95269,-1.464153,0.0486,-1.162097,-0.684066,0.148324,-0.90694,-0.292813,0.647805,-1.142286,1.093052,1.228165,-1.091782,-0.224519,-0.171219,1.332349,-0.416869,-0.519977,-2.516383,-2.088141,-1.06446,-0.210851,-0.725414,0.926368,1.070919,-1.074553,-1.489991,0.424806,-0.826769,0.097031,-0.088284,0.194873,0.738627,1.037482,-0.924275,-0.551034,-0.51577,0.595025,-1.351485,-1.483648,-2.485498,0.034812,-1.035437,0.600973,-0.626463,0.950461,-1.309381,-0.140108,1.100806,0.639662,-0.979765,1.505407,-0.521984,0.551867,,,,,,,,,,,,,,,,,,


In [44]:
from src.data.eda import Eda
eda = Eda()
eda.missing_values_table(merged_df_fe3)

The selected dataframe has 252 columns and 21 columns with missing values.


Unnamed: 0,Missing Values,% of Total Values
normalized_log_avg_sales_5_days,148868,100.0
normalized_log_avg_price_5_days,148868,100.0
normalized_log_avg_sales_15_days,148868,100.0
normalized_log_avg_price_15_days,148868,100.0
normalized_log_avg_sales_10_days,148868,100.0
normalized_log_avg_price_10_days,148868,100.0
normalized_std_price_15_days,148864,100.0
normalized_std_sales_15_days,148864,100.0
normalized_std_price_10_days,148730,99.9
normalized_std_sales_10_days,148730,99.9
