In [30]:
import pandas as pd

In [32]:
df = pd.read_csv("Bitcoin Historical Data (2).csv")
df.head()

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,03/24/2024,67211.9,64036.5,67587.8,63812.9,65.59K,4.96%
1,03/23/2024,64037.8,63785.6,65972.4,63074.9,35.11K,0.40%
2,03/22/2024,63785.5,65501.5,66633.3,62328.3,72.43K,-2.62%
3,03/21/2024,65503.8,67860.0,68161.7,64616.1,75.26K,-3.46%
4,03/20/2024,67854.0,62046.8,68029.5,60850.9,133.53K,9.35%


In [34]:
df.columns

Index(['Date', 'Price', 'Open', 'High', 'Low', 'Vol.', 'Change %'], dtype='object')

In [28]:
# Show original column names
print("Original column names:")
print(df.columns.tolist())


Original column names:
['Date', 'Price', 'Timestamp']


In [36]:
# Step 1: Rename columns for consistency
df = df.rename(columns={
    'Price': 'Close',
    'Vol.': 'Volume',
    'Change %': 'Change_Pct'
})

# Step 2: Convert Date to datetime format
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

# Step 3: Convert numeric price columns
for col in ['Open', 'High', 'Low', 'Close']:
    df[col] = df[col].replace({',': ''}, regex=True).astype(float)

# Step 4: Convert Volume (handle 'K', 'M')
def convert_volume(val):
    val = str(val).strip()
    if 'K' in val:
        return float(val.replace('K', '').replace(',', '')) * 1000
    elif 'M' in val:
        return float(val.replace('M', '').replace(',', '')) * 1_000_000
    else:
        return pd.to_numeric(val.replace(',', ''), errors='coerce')

df['Volume'] = df['Volume'].apply(convert_volume)

# Step 5: Clean Change_Pct column (remove % and convert to float)
df['Change_Pct'] = df['Change_Pct'].astype(str).str.replace('%', '').str.replace(',', '').astype(float)

# Step 6: Sort by date ascending
df = df.sort_values('Date').reset_index(drop=True)

# Preview cleaned data
df.head()


Unnamed: 0,Date,Close,Open,High,Low,Volume,Change_Pct
0,2010-07-18,0.1,0.0,0.1,0.1,80.0,0.0
1,2010-07-19,0.1,0.1,0.1,0.1,570.0,0.0
2,2010-07-20,0.1,0.1,0.1,0.1,260.0,0.0
3,2010-07-21,0.1,0.1,0.1,0.1,580.0,0.0
4,2010-07-22,0.1,0.1,0.1,0.1,2160.0,0.0


In [38]:
df.to_csv("btc_cleaning_tillmarch2024.csv", index=False)
