In [1]:
import numpy as np

import pandas as pd

In [2]:
index = pd.date_range("1/1/2000", periods=8)

s = pd.Series(np.random.randn(5), index=["a", "b", "c", "d", "e"])

df = pd.DataFrame(np.random.randn(8, 3), index=index, columns=["A", "B", "C"])

In [3]:
data = {
    'Nombre': ['Juan', 'María', 'Carlos'],
    'Edad': ['25', '30', '28'],  # Edad es una cadena en lugar de un entero
    'Ciudad': ['Madrid', 'Barcelona', 'Valencia']
}

df = pd.DataFrame(data)
print(df)

# Mostrar los tipos de datos actuales
print("Tipos de datos antes de inferir:")
print(df.dtypes)

# Usar infer_objects() para inferir tipos de datos más específicos
df = df.infer_objects()

# Mostrar los tipos de datos después de inferir
print("\nTipos de datos después de inferir:")
print(df.dtypes)


   Nombre Edad     Ciudad
0    Juan   25     Madrid
1   María   30  Barcelona
2  Carlos   28   Valencia
Tipos de datos antes de inferir:
Nombre    object
Edad      object
Ciudad    object
dtype: object

Tipos de datos después de inferir:
Nombre    object
Edad      object
Ciudad    object
dtype: object


In [4]:
import datetime

df = pd.DataFrame(
    [
        [1, 2],
        ["a", "b"],
        [datetime.datetime(2016, 3, 2), datetime.datetime(2016, 3, 2)],
    ]
)

df=df.T

print(df)
#    0  1                    2
# 0  1  a  2016-03-02 00:00:00
# 1  2  b  2016-03-02 00:00:00

print(df.dtypes)
# 0    object
# 1    object
# 2    object
# dtype: object

print(df.infer_objects().dtypes)
# 0             int64
# 1            object
# 2    datetime64[ns]
# dtype: object

   0  1                    2
0  1  a  2016-03-02 00:00:00
1  2  b  2016-03-02 00:00:00
0    object
1    object
2    object
dtype: object
0             int64
1            object
2    datetime64[ns]
dtype: object


In [5]:
df = pd.DataFrame({"A": ["a", 1, 2, 3]})
df = df.iloc[1:]
print(df)
#    A
# 1  1
# 2  2
# 3  3
print(df.dtypes)
# A    object
# dtype: object
print(df.infer_objects().dtypes)
# A    int64
# dtype: object

   A
1  1
2  2
3  3
A    object
dtype: object
A    int64
dtype: object


In [6]:
s = pd.Series(['1.0', '2', -3])
pd.to_numeric(s)
# 0    1.0
# 1    2.0
# 2   -3.0
# dtype: float64

pd.to_numeric(s, downcast='float')
# 0    1.0
# 1    2.0
# 2   -3.0
# dtype: float32

pd.to_numeric(s, downcast='signed')
# 0    1
# 1    2
# 2   -3
# dtype: int8

s = pd.Series(['apple', '1.0', '2', -3])
pd.to_numeric(s, errors='ignore')
# 0    apple
# 1      1.0
# 2        2
# 3       -3
# dtype: object

pd.to_numeric(s, errors='coerce')
# 0    NaN
# 1    1.0
# 2    2.0
# 3   -3.0
# dtype: float64

0    NaN
1    1.0
2    2.0
3   -3.0
dtype: float64

In [7]:
m = ["5us", pd.Timedelta("1day")]
pd.to_timedelta(m)
# TimedeltaIndex(['0 days 00:00:00.000005', '1 days 00:00:00'], dtype='timedelta64[ns]', freq=None)

TimedeltaIndex(['0 days 00:00:00.000005', '1 days 00:00:00'], dtype='timedelta64[ns]', freq=None)

In [8]:
import datetime

df = pd.DataFrame([["2016-07-09", datetime.datetime(2016, 3, 2)]] * 2, dtype="O")

print(df)
#             0                    1
# 0  2016-07-09  2016-03-02 00:00:00
# 1  2016-07-09  2016-03-02 00:00:00

print(df.apply(pd.to_datetime))
#            0          1
# 0 2016-07-09 2016-03-02
# 1 2016-07-09 2016-03-02

df = pd.DataFrame([["1.1", 2, 3]] * 2, dtype="O")

print(df)
#      0  1  2
# 0  1.1  2  3
# 1  1.1  2  3

print(df.apply(pd.to_numeric))
#      0  1  2
# 0  1.1  2  3
# 1  1.1  2  3

df = pd.DataFrame([["5us", pd.Timedelta("1day")]] * 2, dtype="O")

print(df)
#      0                1
# 0  5us  1 days 00:00:00
# 1  5us  1 days 00:00:00

print(df.apply(pd.to_timedelta))
#                        0      1
# 0 0 days 00:00:00.000005 1 days
# 1 0 days 00:00:00.000005 1 days

            0                    1
0  2016-07-09  2016-03-02 00:00:00
1  2016-07-09  2016-03-02 00:00:00
           0          1
0 2016-07-09 2016-03-02
1 2016-07-09 2016-03-02
     0  1  2
0  1.1  2  3
1  1.1  2  3
     0  1  2
0  1.1  2  3
1  1.1  2  3
     0                1
0  5us  1 days 00:00:00
1  5us  1 days 00:00:00
                       0      1
0 0 days 00:00:00.000005 1 days
1 0 days 00:00:00.000005 1 days


In [9]:
df1 = pd.DataFrame(np.random.randn(8, 1), columns=["A"], dtype="float32")
df2 = pd.DataFrame(
    {
        "A": pd.Series(np.random.randn(8), dtype="float16"),
        "B": pd.Series(np.random.randn(8)),
        "C": pd.Series(np.random.randint(0, 255, size=8), dtype="uint8"),  # [0,255] (range of uint8)
    }
)
df3=df1.reindex_like(df2).fillna(value=0.0) + df2
dfi = df3.astype("int32")

dfi["E"] = 1

print(dfi)
#    A  B    C  E
# 0  1  0   26  1
# 1  3  1   86  1
# 2  0  0   46  1
# 3  0  1  212  1
# 4 -1 -1   26  1
# 5  1  0    7  1
# 6  0 -1  184  1
# 7  0  0  206  1

print(dfi.dtypes)
# A    int32
# B    int32
# C    int32
# E    int64
# dtype: object

casted = dfi[dfi > 0]

print(casted)
#      A    B    C  E
# 0  1.0  NaN   26  1
# 1  3.0  1.0   86  1
# 2  NaN  NaN   46  1
# 3  NaN  1.0  212  1
# 4  NaN  NaN   26  1
# 5  1.0  NaN    7  1
# 6  NaN  NaN  184  1
# 7  NaN  NaN  206  1

print(casted.dtypes)
# A    float64
# B    float64
# C      int32
# E      int64
# dtype: object

   A  B    C  E
0  1  1  115  1
1  0  0  252  1
2  0  0  145  1
3 -3  0  253  1
4 -1  0  182  1
5  0 -1  138  1
6  1  0  234  1
7 -2  0  190  1
A    int32
B    int32
C    int32
E    int64
dtype: object
     A    B    C  E
0  1.0  1.0  115  1
1  NaN  NaN  252  1
2  NaN  NaN  145  1
3  NaN  NaN  253  1
4  NaN  NaN  182  1
5  NaN  NaN  138  1
6  1.0  NaN  234  1
7  NaN  NaN  190  1
A    float64
B    float64
C      int32
E      int64
dtype: object


In [11]:
df = pd.DataFrame(
    {
        "string": list("abc"),
        "int64": list(range(1, 4)),
        "uint8": np.arange(3, 6).astype("u1"),
        "float64": np.arange(4.0, 7.0),
        "bool1": [True, False, True],
        "bool2": [False, True, False],
        "dates": pd.date_range("now", periods=3),
        "category": pd.Series(list("ABC")).astype("category"),
    }
)


df["tdeltas"] = df.dates.diff()

df["uint64"] = np.arange(3, 6).astype("u8")

df["other_dates"] = pd.date_range("20130101", periods=3)

df["tz_aware_dates"] = pd.date_range("20130101", periods=3, tz="US/Eastern")

print(df)
#   string  int64  uint8  float64  bool1  bool2                      dates  \
# 0      a      1      3      4.0   True  False 2023-09-07 21:48:43.682942   
# 1      b      2      4      5.0  False   True 2023-09-08 21:48:43.682942   
# 2      c      3      5      6.0   True  False 2023-09-09 21:48:43.682942   

#   category tdeltas  uint64 other_dates            tz_aware_dates  
# 0        A     NaT       3  2013-01-01 2013-01-01 00:00:00-05:00  
# 1        B  1 days       4  2013-01-02 2013-01-02 00:00:00-05:00  
# 2        C  1 days       5  2013-01-03 2013-01-03 00:00:00-05:00 

print(df.dtypes)
# string                                object
# int64                                  int64
# uint8                                  uint8
# float64                              float64
# bool1                                   bool
# bool2                                   bool
# dates                         datetime64[ns]
# category                            category
# tdeltas                      timedelta64[ns]
# uint64                                uint64
# other_dates                   datetime64[ns]
# tz_aware_dates    datetime64[ns, US/Eastern]
# dtype: object

  string  int64  uint8  float64  bool1  bool2                      dates  \
0      a      1      3      4.0   True  False 2023-09-07 21:49:13.515133   
1      b      2      4      5.0  False   True 2023-09-08 21:49:13.515133   
2      c      3      5      6.0   True  False 2023-09-09 21:49:13.515133   

  category tdeltas  uint64 other_dates            tz_aware_dates  
0        A     NaT       3  2013-01-01 2013-01-01 00:00:00-05:00  
1        B  1 days       4  2013-01-02 2013-01-02 00:00:00-05:00  
2        C  1 days       5  2013-01-03 2013-01-03 00:00:00-05:00  
string                                object
int64                                  int64
uint8                                  uint8
float64                              float64
bool1                                   bool
bool2                                   bool
dates                         datetime64[ns]
category                            category
tdeltas                      timedelta64[ns]
uint64                

In [14]:
def subdtypes(dtype):
    subs = dtype.__subclasses__()
    if not subs:
        return dtype
    return [dtype, [subdtypes(dt) for dt in subs]]

subdtypes(np.generic)
# [numpy.generic,
#  [[numpy.number,
#    [[numpy.integer,
#      [[numpy.signedinteger,
#        [numpy.int8,
#         numpy.int16,
#         numpy.intc,
#         numpy.int32,
#         numpy.int64,
#         numpy.timedelta64]],
#       [numpy.unsignedinteger,
#        [numpy.uint8, numpy.uint16, numpy.uintc, numpy.uint32, numpy.uint64]]]],
#     [numpy.inexact,
#      [[numpy.floating,
#        [numpy.float16, numpy.float32, numpy.float64, numpy.longdouble]],
#       [numpy.complexfloating,
#        [numpy.complex64, numpy.complex128, numpy.clongdouble]]]]]],
#   [numpy.flexible,
#    [[numpy.character, [numpy.bytes_, numpy.str_]],
#     [numpy.void, [numpy.record]]]],
#   numpy.bool_,
#   numpy.datetime64,
#   numpy.object_]]

[numpy.generic,
 [[numpy.number,
   [[numpy.integer,
     [[numpy.signedinteger,
       [numpy.int8,
        numpy.int16,
        numpy.intc,
        numpy.int32,
        numpy.int64,
        numpy.timedelta64]],
      [numpy.unsignedinteger,
       [numpy.uint8, numpy.uint16, numpy.uintc, numpy.uint32, numpy.uint64]]]],
    [numpy.inexact,
     [[numpy.floating,
       [numpy.float16, numpy.float32, numpy.float64, numpy.longdouble]],
      [numpy.complexfloating,
       [numpy.complex64, numpy.complex128, numpy.clongdouble]]]]]],
  [numpy.flexible,
   [[numpy.character, [numpy.bytes_, numpy.str_]],
    [numpy.void, [numpy.record]]]],
  numpy.bool_,
  numpy.datetime64,
  numpy.object_]]