In [None]:
import pandas as pd
import numpy as np

In [None]:
ser = pd.Series(["uno", "dos", "tres"])
ser

0     uno
1     dos
2    tres
dtype: object

In [None]:
pd.Series(np.ones((3, )))

0    1.0
1    1.0
2    1.0
dtype: float64

In [None]:
ser = pd.Series(["uno", "dos", "tres"], index=[1, "dos", 3])

In [None]:
ser

1       uno
dos     dos
3      tres
dtype: object

In [None]:
ser.iloc[2]

'tres'

In [None]:
ser.loc["dos"]

'dos'

In [None]:
ser.to_numpy()

array(['uno', 'dos', 'tres'], dtype=object)

In [None]:
ser.index

Index([1, 'dos', 3], dtype='object')

In [None]:
df = pd.DataFrame(
    data=[[1, 2], [3, 4]],
    columns=["A", "B"],
    index=[1, 2]
    )

In [None]:
df

Unnamed: 0,A,B
1,1,2
2,3,4


In [None]:
df.loc[1, "A"] + df.loc[2, "B"]

5

In [None]:
df.loc[2].sum()

7

In [None]:
df = pd.DataFrame(
    data=np.eye(5, 3),
    columns=["presion", "temperatura", "volumen"],
    index=[f"t{i}" for i in range(5)]
)

In [None]:
df

Unnamed: 0,presion,temperatura,volumen
t0,1.0,0.0,0.0
t1,0.0,1.0,0.0
t2,0.0,0.0,1.0
t3,0.0,0.0,0.0
t4,0.0,0.0,0.0


In [None]:
df.iloc[1::2, 1:]

Unnamed: 0,temperatura,volumen
t1,1.0,0.0
t3,0.0,0.0


In [None]:
df.loc[["t0", "t2"], ["presion", "volumen"]]

Unnamed: 0,presion,volumen
t0,1.0,0.0
t2,0.0,1.0


In [None]:
df.index

Index(['t0', 't1', 't2', 't3', 't4'], dtype='object')

In [None]:
df.columns

Index(['presion', 'temperatura', 'volumen'], dtype='object')

In [None]:
df = pd.DataFrame(
    data={
        "nivel_lluvia": np.random.randint(0, 2, size=(100, )),
        "temperatura": np.random.uniform(10, 30, size=(100, )),
        "clima": np.random.randint(0, 3, size=(100, ))
        }
)

```sql
SELECT * FROM df WHERE nivel_lluvia = 1;
```

In [None]:
df[df["nivel_lluvia"] == 1]

Unnamed: 0,nivel_lluvia,temperatura,clima
0,1,29.80076,0
3,1,20.533043,0
4,1,10.411331,2
6,1,15.264286,2
8,1,10.129345,1
11,1,23.643612,2
12,1,13.147978,0
13,1,21.119548,0
14,1,27.093959,2
17,1,19.042918,0


```sql
SELECT * FROM df WHERE nivel_lluvia = 1 AND temperatura < 15;
```

In [None]:
df[
   (df["nivel_lluvia"] == 1) &
   (df["temperatura"] < 15)
   ]

Unnamed: 0,nivel_lluvia,temperatura,clima
4,1,10.411331,2
8,1,10.129345,1
12,1,13.147978,0
31,1,11.677505,0
36,1,12.022035,2
42,1,12.255503,2
47,1,10.248094,1
51,1,12.455283,2
72,1,14.780926,0
78,1,11.049974,0


```sql
SELECT * FROM df WHERE nivel_lluvia = 1 OR clima = 2;
```

In [None]:
df[
   (df["nivel_lluvia"] == 1) |
   (df["clima"] == 2) 
    ]

Unnamed: 0,nivel_lluvia,temperatura,clima
0,1,29.800760,0
1,0,12.848668,2
3,1,20.533043,0
4,1,10.411331,2
5,0,21.540955,2
...,...,...,...
94,1,17.329568,1
95,0,15.448332,2
96,1,27.841843,1
97,1,17.426319,0


```sql
SELECT temperatura FROM df WHERE nivel_lluvia = 1 OR clima = 2;
```

In [None]:
df.loc[
   (df["nivel_lluvia"] == 1) |
   (df["clima"] == 2),
   "temperatura"
    ]

0     29.800760
1     12.848668
3     20.533043
4     10.411331
5     21.540955
        ...    
94    17.329568
95    15.448332
96    27.841843
97    17.426319
98    24.962903
Name: temperatura, Length: 69, dtype: float64

In [None]:
df["ciudad"] = "bogota"

In [None]:
df["nivel_lluvia_str"] = df["nivel_lluvia"].map({0: "bajo", 1: "alto"})

In [None]:
df

Unnamed: 0,nivel_lluvia,temperatura,clima,ciudad,nivel_lluvia_str
0,1,29.800760,0,bogota,alto
1,0,12.848668,2,bogota,bajo
2,0,23.322241,0,bogota,bajo
3,1,20.533043,0,bogota,alto
4,1,10.411331,2,bogota,alto
...,...,...,...,...,...
95,0,15.448332,2,bogota,bajo
96,1,27.841843,1,bogota,alto
97,1,17.426319,0,bogota,alto
98,1,24.962903,2,bogota,alto


In [None]:
df["clima_str"] = df["clima"].map({0: "calido", 1: "frio", 2: "templado"})

In [None]:
df

Unnamed: 0,nivel_lluvia,temperatura,clima,ciudad,nivel_lluvia_str,clima_str
0,1,29.800760,0,bogota,alto,calido
1,0,12.848668,2,bogota,bajo,templado
2,0,23.322241,0,bogota,bajo,calido
3,1,20.533043,0,bogota,alto,calido
4,1,10.411331,2,bogota,alto,templado
...,...,...,...,...,...,...
95,0,15.448332,2,bogota,bajo,templado
96,1,27.841843,1,bogota,alto,frio
97,1,17.426319,0,bogota,alto,calido
98,1,24.962903,2,bogota,alto,templado


In [None]:
df.dtypes

nivel_lluvia          int64
temperatura         float64
clima                 int64
ciudad               object
nivel_lluvia_str     object
clima_str            object
dtype: object

In [None]:
df["nivel_lluvia"] = df["nivel_lluvia"].astype(np.float64)

In [None]:
df.dtypes

nivel_lluvia        float64
temperatura         float64
clima                 int64
ciudad               object
nivel_lluvia_str     object
clima_str            object
dtype: object

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   nivel_lluvia      100 non-null    float64
 1   temperatura       100 non-null    float64
 2   clima             100 non-null    int64  
 3   ciudad            100 non-null    object 
 4   nivel_lluvia_str  100 non-null    object 
 5   clima_str         100 non-null    object 
dtypes: float64(2), int64(1), object(3)
memory usage: 4.8+ KB


In [None]:
df.describe()

Unnamed: 0,nivel_lluvia,temperatura,clima
count,100.0,100.0,100.0
mean,0.48,21.278413,1.08
std,0.502117,6.032668,0.849004
min,0.0,10.032106,0.0
25%,0.0,16.228591,0.0
50%,0.0,22.106996,1.0
75%,1.0,26.931528,2.0
max,1.0,29.83157,2.0


In [None]:
df

Unnamed: 0,nivel_lluvia,temperatura,clima,ciudad,nivel_lluvia_str,clima_str
0,1.0,29.800760,0,bogota,alto,calido
1,0.0,12.848668,2,bogota,bajo,templado
2,0.0,23.322241,0,bogota,bajo,calido
3,1.0,20.533043,0,bogota,alto,calido
4,1.0,10.411331,2,bogota,alto,templado
...,...,...,...,...,...,...
95,0.0,15.448332,2,bogota,bajo,templado
96,1.0,27.841843,1,bogota,alto,frio
97,1.0,17.426319,0,bogota,alto,calido
98,1.0,24.962903,2,bogota,alto,templado


In [None]:
df["temperatura_str"] = df["temperatura"].apply(
    lambda x: "alto" if x > 15 else "bajo"
)

In [None]:
df

Unnamed: 0,nivel_lluvia,temperatura,clima,ciudad,nivel_lluvia_str,clima_str,temperatura_str
0,1.0,29.800760,0,bogota,alto,calido,alto
1,0.0,12.848668,2,bogota,bajo,templado,bajo
2,0.0,23.322241,0,bogota,bajo,calido,alto
3,1.0,20.533043,0,bogota,alto,calido,alto
4,1.0,10.411331,2,bogota,alto,templado,bajo
...,...,...,...,...,...,...,...
95,0.0,15.448332,2,bogota,bajo,templado,alto
96,1.0,27.841843,1,bogota,alto,frio,alto
97,1.0,17.426319,0,bogota,alto,calido,alto
98,1.0,24.962903,2,bogota,alto,templado,alto


In [None]:
def temperatura_alta(x):
    if x > 15:
        return "alto"
    else:
        return "bajo"

In [None]:
df["temperatura_str"] = df["temperatura"].apply(
    temperatura_alta
)

In [None]:
df

Unnamed: 0,nivel_lluvia,temperatura,clima,ciudad,nivel_lluvia_str,clima_str,temperatura_str
0,1.0,29.800760,0,bogota,alto,calido,alto
1,0.0,12.848668,2,bogota,bajo,templado,bajo
2,0.0,23.322241,0,bogota,bajo,calido,alto
3,1.0,20.533043,0,bogota,alto,calido,alto
4,1.0,10.411331,2,bogota,alto,templado,bajo
...,...,...,...,...,...,...,...
95,0.0,15.448332,2,bogota,bajo,templado,alto
96,1.0,27.841843,1,bogota,alto,frio,alto
97,1.0,17.426319,0,bogota,alto,calido,alto
98,1.0,24.962903,2,bogota,alto,templado,alto


In [None]:
def concatenar(row):
    return "-".join([
                     row["nivel_lluvia_str"],
                     row["temperatura_str"]
                     ])

In [None]:
concatenar(df.loc[0])

'alto-alto'

In [None]:
df["concat"] = df.apply(concatenar, axis=1)

In [None]:
df

Unnamed: 0,nivel_lluvia,temperatura,clima,ciudad,nivel_lluvia_str,clima_str,temperatura_str,concat
0,1.0,29.800760,0,bogota,alto,calido,alto,alto-alto
1,0.0,12.848668,2,bogota,bajo,templado,bajo,bajo-bajo
2,0.0,23.322241,0,bogota,bajo,calido,alto,bajo-alto
3,1.0,20.533043,0,bogota,alto,calido,alto,alto-alto
4,1.0,10.411331,2,bogota,alto,templado,bajo,alto-bajo
...,...,...,...,...,...,...,...,...
95,0.0,15.448332,2,bogota,bajo,templado,alto,bajo-alto
96,1.0,27.841843,1,bogota,alto,frio,alto,alto-alto
97,1.0,17.426319,0,bogota,alto,calido,alto,alto-alto
98,1.0,24.962903,2,bogota,alto,templado,alto,alto-alto


In [None]:
def mostrar(x):
    print(x)
    print("="*50)
    return 1

In [None]:
mostrar

<function __main__.mostrar>

In [None]:
df.apply(mostrar, axis=1)

nivel_lluvia                1
temperatura           29.8008
clima                       0
ciudad                 bogota
nivel_lluvia_str         alto
clima_str              calido
temperatura_str          alto
concat              alto-alto
Name: 0, dtype: object
nivel_lluvia                0
temperatura           12.8487
clima                       2
ciudad                 bogota
nivel_lluvia_str         bajo
clima_str            templado
temperatura_str          bajo
concat              bajo-bajo
Name: 1, dtype: object
nivel_lluvia                0
temperatura           23.3222
clima                       0
ciudad                 bogota
nivel_lluvia_str         bajo
clima_str              calido
temperatura_str          alto
concat              bajo-alto
Name: 2, dtype: object
nivel_lluvia                1
temperatura            20.533
clima                       0
ciudad                 bogota
nivel_lluvia_str         alto
clima_str              calido
temperatura_str          alto
c

0     1
1     1
2     1
3     1
4     1
     ..
95    1
96    1
97    1
98    1
99    1
Length: 100, dtype: int64

In [None]:
df["nivel_lluvia"].sum()

48.0

In [None]:
df["nivel_lluvia"].mean()

0.48

In [None]:
df["nivel_lluvia"].std()

0.502116731568678

In [None]:
df["nivel_lluvia_str"].unique()

array(['alto', 'bajo'], dtype=object)

In [None]:
df["nivel_lluvia_str"].value_counts()

bajo    52
alto    48
Name: nivel_lluvia_str, dtype: int64

In [None]:
df["temperatura"].sort_values()

93    10.032106
8     10.129345
47    10.248094
4     10.411331
78    11.049974
        ...    
80    29.413306
37    29.450343
26    29.726912
0     29.800760
7     29.831570
Name: temperatura, Length: 100, dtype: float64

In [None]:
df

Unnamed: 0,nivel_lluvia,temperatura,clima,ciudad,nivel_lluvia_str,clima_str,temperatura_str,concat
0,1.0,29.800760,0,bogota,alto,calido,alto,alto-alto
1,0.0,12.848668,2,bogota,bajo,templado,bajo,bajo-bajo
2,0.0,23.322241,0,bogota,bajo,calido,alto,bajo-alto
3,1.0,20.533043,0,bogota,alto,calido,alto,alto-alto
4,1.0,10.411331,2,bogota,alto,templado,bajo,alto-bajo
...,...,...,...,...,...,...,...,...
95,0.0,15.448332,2,bogota,bajo,templado,alto,bajo-alto
96,1.0,27.841843,1,bogota,alto,frio,alto,alto-alto
97,1.0,17.426319,0,bogota,alto,calido,alto,alto-alto
98,1.0,24.962903,2,bogota,alto,templado,alto,alto-alto


In [None]:
df.groupby(by="clima_str").agg({"temperatura": np.mean, "nivel_lluvia": "sum"})

Unnamed: 0_level_0,temperatura,nivel_lluvia
clima_str,Unnamed: 1_level_1,Unnamed: 2_level_1
calido,21.886808,18.0
frio,21.325244,11.0
templado,20.758916,19.0


In [None]:
df.groupby(by=["clima_str", "nivel_lluvia_str"]).agg({"temperatura": np.mean})

Unnamed: 0_level_0,Unnamed: 1_level_0,temperatura
clima_str,nivel_lluvia_str,Unnamed: 2_level_1
calido,alto,20.095717
calido,bajo,24.18964
frio,alto,20.99709
frio,bajo,21.53758
templado,alto,20.996529
templado,bajo,20.543933


In [None]:
df

Unnamed: 0,nivel_lluvia,temperatura,clima,ciudad,nivel_lluvia_str,clima_str,temperatura_str,concat
0,1.0,29.800760,0,bogota,alto,calido,alto,alto-alto
1,0.0,12.848668,2,bogota,bajo,templado,bajo,bajo-bajo
2,0.0,23.322241,0,bogota,bajo,calido,alto,bajo-alto
3,1.0,20.533043,0,bogota,alto,calido,alto,alto-alto
4,1.0,10.411331,2,bogota,alto,templado,bajo,alto-bajo
...,...,...,...,...,...,...,...,...
95,0.0,15.448332,2,bogota,bajo,templado,alto,bajo-alto
96,1.0,27.841843,1,bogota,alto,frio,alto,alto-alto
97,1.0,17.426319,0,bogota,alto,calido,alto,alto-alto
98,1.0,24.962903,2,bogota,alto,templado,alto,alto-alto


In [None]:
df2 = pd.DataFrame(
    data=np.random.uniform(size=(90, 3)),
    columns=["m1", "m2", "m3"]
    )

In [None]:
df2

Unnamed: 0,m1,m2,m3
0,0.727434,0.606492,0.424049
1,0.564258,0.546059,0.096674
2,0.495476,0.591310,0.642555
3,0.798788,0.691385,0.907367
4,0.766018,0.665509,0.953510
...,...,...,...
85,0.675980,0.227483,0.907668
86,0.925957,0.075325,0.174314
87,0.473502,0.689112,0.275608
88,0.877770,0.074078,0.610775


In [None]:
res = (
    df
    .join(df2, how="left")
    .fillna(0)
    .groupby("clima_str")
    .agg({"temperatura": np.mean})
)

In [None]:
res

Unnamed: 0_level_0,temperatura
clima_str,Unnamed: 1_level_1
calido,21.886808
frio,21.325244
templado,20.758916


In [None]:
df.join(df2, how="left").fillna(0)

Unnamed: 0,nivel_lluvia,temperatura,clima,ciudad,nivel_lluvia_str,clima_str,temperatura_str,concat,m1,m2,m3
0,1.0,29.800760,0,bogota,alto,calido,alto,alto-alto,0.727434,0.606492,0.424049
1,0.0,12.848668,2,bogota,bajo,templado,bajo,bajo-bajo,0.564258,0.546059,0.096674
2,0.0,23.322241,0,bogota,bajo,calido,alto,bajo-alto,0.495476,0.591310,0.642555
3,1.0,20.533043,0,bogota,alto,calido,alto,alto-alto,0.798788,0.691385,0.907367
4,1.0,10.411331,2,bogota,alto,templado,bajo,alto-bajo,0.766018,0.665509,0.953510
...,...,...,...,...,...,...,...,...,...,...,...
95,0.0,15.448332,2,bogota,bajo,templado,alto,bajo-alto,0.000000,0.000000,0.000000
96,1.0,27.841843,1,bogota,alto,frio,alto,alto-alto,0.000000,0.000000,0.000000
97,1.0,17.426319,0,bogota,alto,calido,alto,alto-alto,0.000000,0.000000,0.000000
98,1.0,24.962903,2,bogota,alto,templado,alto,alto-alto,0.000000,0.000000,0.000000


In [None]:
df3 = pd.DataFrame({"clima": [0, 1, 2], "tmp": ["uno", "dos", "tres"]})

In [None]:
df3

Unnamed: 0,clima,tmp
0,0,uno
1,1,dos
2,2,tres


In [None]:
df.merge(df3, on="clima")

Unnamed: 0,nivel_lluvia,temperatura,clima,ciudad,nivel_lluvia_str,clima_str,temperatura_str,concat,tmp
0,1.0,29.800760,0,bogota,alto,calido,alto,alto-alto,uno
1,0.0,23.322241,0,bogota,bajo,calido,alto,bajo-alto,uno
2,1.0,20.533043,0,bogota,alto,calido,alto,alto-alto,uno
3,0.0,21.210906,0,bogota,bajo,calido,alto,bajo-alto,uno
4,1.0,13.147978,0,bogota,alto,calido,bajo,alto-bajo,uno
...,...,...,...,...,...,...,...,...,...
95,0.0,19.193496,1,bogota,bajo,frio,alto,bajo-alto,dos
96,0.0,14.957561,1,bogota,bajo,frio,bajo,bajo-bajo,dos
97,1.0,17.329568,1,bogota,alto,frio,alto,alto-alto,dos
98,1.0,27.841843,1,bogota,alto,frio,alto,alto-alto,dos
