In [52]:
import pandas as pd
import os
import glob

In [53]:
class DataProcessor:
    def __init__(self, file_path, sep='\t'):
        self.file_path = file_path
        self.sep = sep
        self.df = pd.read_csv(file_path, sep=sep)
    
    def remove_columns(self, columns):
        self.df = self.df.drop(columns=columns)
    
    def drop_duplicates(self):
        self.df = self.df.drop_duplicates()
    
    def rename_columns(self, columns_dict):
        self.df.rename(columns=columns_dict, inplace=True)
    
    def save_to_csv(self, output_path):
        self.df.to_csv(output_path, index=False)
    
    def classify_product(self):
        self.df['Category'] = self.df['Tên sản phẩm'].apply(self._classify)
    
    def _classify(self, name):
        name = name.lower()
        if 'áo' in name or 'sơ mi' in name:
            return 'Áo'
        elif 'quần' in name:
            return 'Quần'
        elif 'váy' in name:
            return 'Váy'
        elif 'túi' in name or 'ví' in name:
            return 'Túi'
        elif 'giày' in name or 'sandal' in name:
            return 'Giày'
        elif 'dép' in name:
            return 'Dép'
        elif 'đầm' in name:
            return 'Đầm'
        elif 'nón' in name or 'mũ' in name:
            return 'Nón/Mũ'
        elif 'balo' in name or 'cặp' in name or 'ba lô' in name:
            return 'Balo/Cặp'
        elif 'mắt kính' in name or 'kính' in name:
            return 'Mắt Kính'
        elif 'khăn' in name:
            return 'Khăn'
        elif 'tất' in name:
            return 'Tất'
        elif 'găng tay' in name:
            return 'Găng Tay'
        elif 'đồng hồ' in name:
            return 'Đồng Hồ'
        elif 'vòng' in name:
            return 'Vòng'
        elif 'thắt lưng' in name:
            return 'Thắt Lưng'
        elif 'bộ' in name:
            return 'Bộ'
        elif 'khuyên tai' in name or 'kẹp' in name or 'dây chuyền' in name or 'bông tai' in name or 'gắp' in name or 'nhẫn' in name:
            return 'Phụ kiện'
        else:
            return 'Khác'


# Remove unnecessary features

### 1. Tiki data

In [54]:
processor = DataProcessor('Data/tiki_products_modified.csv', sep=',')
processor.classify_product()
processor.save_to_csv('Data/tiki_products_final.csv')

## 2. Uniqlo data

In [55]:
processor = DataProcessor('Data/uniqlo_products_modified.csv', sep=',')
processor.classify_product()
processor.save_to_csv('Data/uniqlo_products_final.csv')

In [56]:
processor = DataProcessor('Data/adidas_nikes_products_modified.csv', sep=',')
processor.classify_product()
processor.save_to_csv('Data/adidas_nikes_products_final.csv')

In [57]:
csv_files = ['Data/tiki_products_final.csv', 'Data/uniqlo_products_final.csv', 'Data/adidas_nikes_products_final.csv']

# Đọc và nối các file CSV
dfs = [pd.read_csv(file) for file in csv_files]

# Nối tất cả các DataFrame lại với nhau
merged_df = pd.concat(dfs, ignore_index=True)

# Lưu kết quả vào file CSV mới
merged_df.to_csv('merged_file.csv', index=False)