In [39]:
# Step 1: Load Packages and Libraries

# Data manipulation
import pandas as pd  # for data cleaning and operations
import numpy as np  # for numerical operations and array handling

# Data visualization
import matplotlib.pyplot as plt  # for basic plotting
import seaborn as sns  # Fixed typo: was 'season'/'sms', corrected to 'sns'

# Machine learning and text processing
from sklearn.feature_extraction.text import TfidfVectorizer  # Fixed capitalization and spelling
from sklearn.metrics.pairwise import cosine_similarity  # for similarity calculations

# Additional utilities
import os  # for operating system interactions
from scipy.sparse import coo_matrix  # for sparse matrix operations

In [40]:
# Data loading with proper error handling
try:
    # First try CSV format
    try:
        df = pd.read_csv("marketing_sample_for_walmart_5k_data.csv")  # Fixed filename spelling
    except:
        # Try TSV format if CSV fails
        try:
            df = pd.read_csv("marketing_sample_for_walmart_5k_data.tsv", sep='\t')
        except Exception as e:
            print(f"Error loading file: {e}")
            print("Available files:", os.listdir())
            raise

    print('Data loaded successfully!')
    
    # Configure display to show more data
    pd.set_option('display.max_columns', None)
    pd.set_option('display.expand_frame_repr', False)
    
    print(df.head())
    
except Exception as e:
    print(f"Error occurred: {e}")

Data loaded successfully!
                            Uniq Id            Crawl Timestamp  \
0  1705736792d82aa2f2d3caf1c07c53f4  2020-09-24 03:21:12 +0000   
1  95a9fe6f4810fcfc7ff244fd06784f11  2020-10-30 14:04:08 +0000   
2  8d4d0330178d3ed181b15a4102b287f2  2020-08-06 05:51:47 +0000   
3  fddc4df45b35efd886794b261f730c51  2020-07-15 11:22:04 +0000   
4  0990cf89a59ca6a0460349a3e4f51d42  2020-11-26T12:27:20+00:00   

   Dataset Origin                        Product Id  Product Barcode  \
0             NaN  2e17bf4acecdece67fc00f07ad62c910              NaN   
1             NaN  076e5854a62dd283c253d6bae415af1f              NaN   
2             NaN  8a4fe5d9c7a6ed26cc44d785a454b124              NaN   
3             NaN  03b5fb878a33eadff8b033419eab9669              NaN   
4             NaN  ce3d761e57d6ccad80619297b5b1bcbc              NaN   

  Product Company Type Source Product Brand Source  \
0                  Competitor                  NaN   
1                  Competitor       