In [1]:
import os
import pandas as pd
import glob

# Path ke folder yang berisi file CSV
path = "dataset/January 2021/January 2021/Tokped/Dell"

# Debug output untuk memastikan path yang digunakan
print(f"Checking files in directory: {path}")
print(f"Absolute path: {os.path.abspath(path)}")

# Menggunakan glob untuk menemukan semua file CSV di dalam folder
all_files = glob.glob(path + "/*.csv")

# Memeriksa apakah ada file yang ditemukan
if not all_files:
    raise ValueError(f"No CSV files found in the directory: {path}")

# Membaca dan menggabungkan semua file CSV
df_list = []
for filename in all_files:
    print(f"Reading file: {filename}")  # Debug output
    df = pd.read_csv(filename)
    if not df.empty:
        df_list.append(df)
    else:
        print(f"Warning: {filename} is empty and will be skipped.")

# Memeriksa apakah ada DataFrame yang valid untuk digabungkan
if not df_list:
    raise ValueError("No valid DataFrames to concatenate. All files may be empty.")

# Menggabungkan semua DataFrame dalam daftar menjadi satu DataFrame
combined_df = pd.concat(df_list, ignore_index=True)

# Menyimpan hasil gabungan ke file CSV baru
output_path = "dataset/data_combined.csv"
print(f"Saving combined DataFrame to: {output_path}")  # Debug output
combined_df.to_csv(output_path, index=False)


Checking files in directory: dataset/January 2021/January 2021
Absolute path: D:\Kuliah\MSIB Bangkit\Project\Models\dataset\January 2021\January 2021


ValueError: No CSV files found in the directory: dataset/January 2021/January 2021

In [13]:
df = pd.read_csv("dataset/data_combined.csv")

print(df)

                                                   name       price  \
0     DELL Inspiron 14-3493 [Core i3-1005G1/14"FHD/4...   6.799.000   
1     Dell Inspiron 15 5583 i5-8265 8GB 2TB GARANSI ...  11.499.000   
2     Dell Inspiron 5402 [Ci7-1165G7-8-512-NVD-W10-O...  15.299.000   
3     DELL INSPIRON 3881 i7 10700F 16GB 512GB GTX166...  16.990.000   
4     Dell Inspiron AIO 5400 [Ci7-1165G7-8-256+1T-NV...  14.499.000   
...                                                 ...         ...   
1755                             Dell Inspiron AiO 3064   7.100.000   
1756  DELL INSPIRON AIO 5477 (i5-8400/8GB/2TB/GTX105...  16.099.999   
1757  Dell Inspiron 3481 I3 7020U 4GB 1TB AMD RADEON...   6.740.000   
1758  Obrallll Laptop Thinlpad L530 ci5 3rd gen Mura...   2.450.000   
1759        LAPTOP HP 820 g2 i7gen5 8gb hdd500 FREE TAS   4.850.000   

           location sold                                               link  \
0       Kab. Batang    3  https://ta.tokopedia.com/promo/v1/clicks/8

In [19]:
import pandas as pd

# Membaca file CSV
file_path = "dataset/data_combined.csv"
df = pd.read_csv(file_path)

# Menambahkan kolom 'terjual' yang berisi jumlah penjualan untuk setiap entri yang memiliki nilai yang sama di kolom 'name'
df['terjual'] = df.groupby('name').cumcount() + 1

# Menggabungkan baris yang memiliki nilai yang sama di kolom 'name'
df = df.drop_duplicates(subset='name')

# Menyimpan DataFrame hasil penggabungan dan penghapusan duplikat ke file CSV baru
output_file_path = "dataset/data_combined_processed.csv"
df.to_csv(output_file_path, index=False)

print("File CSV baru telah dibuat:", output_file_path)


File CSV baru telah dibuat: dataset/data_combined_processed.csv


In [9]:
import os
import pandas as pd
import glob

# Path ke folder utama yang berisi file CSV
base_path = "dataset/January 2021"

# Debug output untuk memastikan path yang digunakan
print(f"Checking files in directory: {base_path}")
print(f"Absolute path: {os.path.abspath(base_path)}")

# Menggunakan glob untuk menemukan semua file CSV di dalam semua sub-folder
all_files = glob.glob(os.path.join(base_path, '**/*.csv'), recursive=True)

# Memeriksa apakah ada file yang ditemukan
if not all_files:
    raise ValueError(f"No CSV files found in the directory and subdirectories: {base_path}")

# Membaca dan menggabungkan semua file CSV
df_list = []
for filename in all_files:
    print(f"Reading file: {filename}")  # Debug output
    try:
        df = pd.read_csv(filename)
        if 'link' in df.columns:
            df = df.drop(columns=['link'])  # Menghapus kolom 'link'
        if not df.empty:
            df_list.append(df)
        else:
            print(f"Warning: {filename} is empty and will be skipped.")
    except Exception as e:
        print(f"Error reading {filename}: {e}")

# Memeriksa apakah ada DataFrame yang valid untuk digabungkan
if not df_list:
    raise ValueError("No valid DataFrames to concatenate. All files may be empty or unreadable.")

# Menggabungkan semua DataFrame dalam daftar menjadi satu DataFrame
combined_df = pd.concat(df_list, ignore_index=True)

# Menyimpan hasil gabungan ke file CSV baru
output_path = "dataset/data_combined_baru.csv"
print(f"Saving combined DataFrame to: {output_path}")  # Debug output
combined_df.to_csv(output_path, index=False)


Checking files in directory: dataset/January 2021
Absolute path: D:\Kuliah\MSIB Bangkit\Project\Models\dataset\January 2021
Reading file: dataset/January 2021\January 2021\Shopee\Acer\aspire_shopee_14_01_2021.csv
Reading file: dataset/January 2021\January 2021\Shopee\Acer\aspire_shopee_15_01_2021.csv
Reading file: dataset/January 2021\January 2021\Shopee\Acer\aspire_shopee_16_01_2021.csv
Reading file: dataset/January 2021\January 2021\Shopee\Acer\aspire_shopee_18_01_2021.csv
Reading file: dataset/January 2021\January 2021\Shopee\Acer\aspire_shopee_19_01_2021.csv
Reading file: dataset/January 2021\January 2021\Shopee\Acer\aspire_shopee_20_01_2021.csv
Reading file: dataset/January 2021\January 2021\Shopee\Acer\aspire_shopee_21_01_2021.csv
Reading file: dataset/January 2021\January 2021\Shopee\Acer\aspire_shopee_22_01_2021.csv
Reading file: dataset/January 2021\January 2021\Shopee\Acer\aspire_shopee_23_01_2021.csv
Reading file: dataset/January 2021\January 2021\Shopee\Acer\aspire_shopee_2

In [10]:
# Membaca file CSV
file_path = "dataset/data_combined_baru.csv"
df = pd.read_csv(file_path)

print(df)

                                                    name       price  \
0      LCD LED Screen Panel 13.3 Slim Acer Aspire S3 ...   2.000.000   
1                                           Toshiba R732   2.599.000   
2      Laptop ACER ASPIRE 3 A314-22 | NX.HVVSN.00L BO...   5.599.000   
3        acer aspire es1-431 N3050 RAM 4GB HDD 500GB\r\n   3.950.000   
4                                     Acer Aspire V5 132   2.000.000   
...                                                  ...         ...   
29793  MSI PRESTIGE 14 i7 10710 8GB 512ssd GTX1650 4G...  18.699.000   
29794  MSI PRESTIGE 15 A10SC-080ID I7-10710U 16GB 512...  23.499.000   
29795  Laptop MSI Prestige 14 A10RAS - 217 (pink) i7-...  19.999.000   
29796  MSI Prestige 14 A11SCX [9S7-14C412-231] i7-118...  27.999.000   
29797  MSI LAPTOP PRESTIGE 14-A11SCX-231 i7-1185G7 32...  27.999.000   

                     location sold        date  
0          KOTA JAKARTA PUSAT    0  14-01-2021  
1               KOTA SEMARANG   11  1