In [23]:
import pandas as pd
import json

# Step 1: Load JSON file
with open('data/AK-47 - Ice Coaled (Factory New).json') as f:
    data = json.load(f)

# Step 2: Convert 'prices' list to DataFrame
df = pd.DataFrame(data['prices'], columns=['Date', 'Price', 'Volume'])

# Step 3: Clean the 'Date' column (remove invalid parts like "01: +0")
df['Date'] = df['Date'].str.extract(r'^(.*?\d{4})')  # Extracts "Jul 01 2022"
df['Date'] = pd.to_datetime(df['Date'], format="%b %d %Y", errors='coerce')

# Step 4: Convert 'Price' and 'Volume' to numeric
df['Price'] = pd.to_numeric(df['Price'], errors='coerce')

df['Volume'] = df['Volume'].astype(str).str.replace(',', '')  # Remove commas if present
df['Volume'] = pd.to_numeric(df['Volume'], errors='coerce')

# Step 5: Drop rows with missing values
df = df.dropna(subset=['Date', 'Price', 'Volume'])

# Step 6: Drop duplicate rows
df = df.drop_duplicates()

# Step 7: Normalize 'Price' and 'Volume' manually
for col in ['Price', 'Volume']:
    df[col + '_norm'] = (df[col] - df[col].min()) / (df[col].max() - df[col].min())

# Step 8: Save cleaned DataFrame to CSV
df.to_csv('cleaned_data.csv', index=False)

# Step 9: Preview cleaned data
df.head()


Unnamed: 0,Date,Price,Volume,Price_norm,Volume_norm
0,2022-07-01,22714.799,1,1.0,0.0
1,2022-07-02,16488.504,73,0.708333,0.211765
2,2022-07-03,12852.618,64,0.538012,0.185294
3,2022-07-04,11102.779,57,0.456042,0.164706
4,2022-07-05,10701.924,46,0.437264,0.132353
