# Limpieza in-place — Criptomonedas (local)
Este notebook asume que los CSV están en **este mismo directorio**:
`coin_Dogecoin.csv`, `coin_Ethereum.csv`, `coin_USDCoin.csv`.

Salida: sobrescribe cada archivo y genera `crypto_clean.csv` unificado.

In [5]:
import pandas as pd, numpy as np, os
from pathlib import Path
FILES = ['coin_Dogecoin.csv','coin_Ethereum.csv', 'coin_Bitcoin.csv', 'coin_BinanceCoin.csv', 'coin_Solana.csv']
OUT_UNIFIED = 'crypto_clean.csv'
def normalizar_decimales(df, cols):
    for c in cols:
        if c in df.columns:
            df[c] = (
                df[c].astype(str)
                .str.replace(r'[^0-9eE.,\-]', '', regex=True)
                .str.replace(',', '.', regex=False)
            )
            df[c] = pd.to_numeric(df[c], errors='coerce')
    return df
def limpiar_archivo(fname):
    df = pd.read_csv(fname)
    df.columns = [c.strip().title().replace('Market Cap','Marketcap') for c in df.columns]
    df = normalizar_decimales(df, ['High','Low','Open','Close','Volume','Marketcap'])
    if 'Date' in df.columns:
        df['Date'] = pd.to_datetime(df['Date'], errors='coerce', utc=True)
        df['Year'] = df['Date'].dt.year
    if 'Symbol' in df.columns:
        df['Symbol'] = df['Symbol'].astype(str).str.upper().str.strip()
    if 'Name' in df.columns:
        df['Name'] = df['Name'].astype(str).str.strip()
    df = df.dropna(subset=['Date','Symbol','Close']).sort_values('Date').reset_index(drop=True)
    df.to_csv(fname, index=False)
    return df
dfs = []
missing = [f for f in FILES if not os.path.isfile(f)]
if missing:
    raise FileNotFoundError(f"No se encontraron: {missing}. Coloca los CSV en este directorio y reintenta.")
for f in FILES:
    dfs.append(limpiar_archivo(f))
uni = pd.concat(dfs, ignore_index=True, sort=False)
keep = ['Sno','Name','Symbol','Date','High','Low','Open','Close','Volume','Marketcap','Year']
keep = [c for c in keep if c in uni.columns]
uni = uni[keep].drop_duplicates(subset=['Symbol','Date']).sort_values('Date').reset_index(drop=True)
uni.to_csv(OUT_UNIFIED, index=False)
print('Listo →', OUT_UNIFIED)
uni.describe(include='all')

Listo → crypto_clean.csv


Unnamed: 0,Sno,Name,Symbol,Date,High,Low,Open,Close,Volume,Marketcap,Year
count,9805.0,9805,9805,9805,9805.0,9805.0,9805.0,9805.0,9805.0,9805.0,9805.0
unique,,5,5,,,,,,,,
top,,Bitcoin,BTC,,,,,,,,
freq,,2991,2991,,,,,,,,
mean,1199.528506,,,2018-03-26 11:18:56.093319680+00:00,2199.107182,2066.764564,2136.350976,2140.013545,5104610000.0,47831220000.0,2017.735849
min,1.0,,,2013-04-29 23:59:59+00:00,8.9e-05,8.5e-05,8.7e-05,8.7e-05,0.0,0.0,2013.0
25%,500.0,,,2016-07-13 23:59:59+00:00,0.005495,0.004901,0.005222,0.005218,11323400.0,314179900.0,2016.0
50%,1113.0,,,2018-06-20 23:59:59+00:00,31.799553,29.51284,30.655258,30.70443,88877100.0,3531509000.0,2018.0
75%,1820.0,,,2020-02-23 23:59:59+00:00,576.602397,531.333984,558.497009,559.678501,3149576000.0,29045040000.0,2020.0
max,2991.0,,,2021-07-06 23:59:59+00:00,64863.098908,62208.964366,63523.754869,63503.45793,350967900000.0,1186364000000.0,2021.0
