In [11]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression

In [12]:
# df = pd.read_csv('/home/alumnadotarde/Descargas/Bitcoin.csv')
df = pd.read_csv('./bitcoin.csv', delimiter=',')
df.head()

Unnamed: 0,Fecha,Último,Apertura,Máximo,Mínimo,Vol.,% var.
0,15.12.2024,"104.443,0","101.417,9","105.120,9","101.234,9","133,99K",+2.98%
1,14.12.2024,"101.417,7","101.423,7","102.633,0","100.626,3","105,31K",-0.01%
2,13.12.2024,"101.426,2","100.008,3","101.891,2","99.214,2","162,40K",+1.42%
3,12.12.2024,"100.347,3","101.126,3","102.495,3","99.334,5","189,02K","-0,77%"
4,11.12.2024,"101.126,2","96.603,2","101.877,1","95.689,5","250,02K","4,69%"


In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4722 entries, 0 to 4721
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Fecha     4722 non-null   object
 1   Último    4722 non-null   object
 2   Apertura  4722 non-null   object
 3   Máximo    4722 non-null   object
 4   Mínimo    4722 non-null   object
 5   Vol.      4722 non-null   object
 6   % var.    4722 non-null   object
dtypes: object(7)
memory usage: 258.4+ KB


In [14]:
if df['Vol.'].dtype == 'object':  # Limpiar solo si el tipo es texto
    df['Vol.'] = (
        df['Vol.']
        .str.replace('K', '*1e3', regex=False)  # Miles
        .str.replace('M', '*1e6', regex=False)  # Millones
        .str.replace('B', '*1e9', regex=False)  # Miles de millones
        .str.replace('.', '', regex=False)  # Eliminar puntos
        .str.replace(',', '.', regex=False)  # Reemplazar comas por puntos
        .map(eval)  # Evaluar las expresiones matemáticas
    )


In [15]:
for col in ['Último', 'Apertura', 'Máximo', 'Mínimo']:
    if df[col].dtype == 'object':  # Limpiar solo si el tipo es texto
        df[col] = df[col].str.replace('.', '', regex=False).str.replace(',', '.', regex=False).astype(float)
df.head()

Unnamed: 0,Fecha,Último,Apertura,Máximo,Mínimo,Vol.,% var.
0,15.12.2024,104443.0,101417.9,105120.9,101234.9,133990.0,+2.98%
1,14.12.2024,101417.7,101423.7,102633.0,100626.3,105310.0,-0.01%
2,13.12.2024,101426.2,100008.3,101891.2,99214.2,162400.0,+1.42%
3,12.12.2024,100347.3,101126.3,102495.3,99334.5,189020.0,"-0,77%"
4,11.12.2024,101126.2,96603.2,101877.1,95689.5,250020.0,"4,69%"


In [16]:
columns_to_convert = df.columns[df.columns != 'Fecha']  # Seleccionamos todas menos 'Fecha'
df[columns_to_convert] = df[columns_to_convert].apply(pd.to_numeric, errors='coerce')

In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4722 entries, 0 to 4721
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Fecha     4722 non-null   object 
 1   Último    4722 non-null   float64
 2   Apertura  4722 non-null   float64
 3   Máximo    4722 non-null   float64
 4   Mínimo    4722 non-null   float64
 5   Vol.      4722 non-null   float64
 6   % var.    0 non-null      float64
dtypes: float64(6), object(1)
memory usage: 258.4+ KB


In [20]:
# Filtrar solo las columnas de tipo float
df_float = df.select_dtypes(include=['float64'])

# Calcular la correlación solo entre columnas de tipo float
print(df_float.corr())


            Último  Apertura    Máximo    Mínimo      Vol.  % var.
Último    1.000000  0.999198  0.999662  0.999569  0.096754     NaN
Apertura  0.999198  1.000000  0.999618  0.999428  0.097359     NaN
Máximo    0.999662  0.999618  1.000000  0.999292  0.097870     NaN
Mínimo    0.999569  0.999428  0.999292  1.000000  0.096598     NaN
Vol.      0.096754  0.097359  0.097870  0.096598  1.000000     NaN
% var.         NaN       NaN       NaN       NaN       NaN     NaN


- Ultimo, queremos añadir otra variable a elegir y tenga una explicacion(volumen)